xref: /openbmc/linux/arch/x86/platform/uv/uv_nmi.c (revision 9fb29c73)
1 /*
2  * SGI NMI support routines
3  *
4  *  This program is free software; you can redistribute it and/or modify
5  *  it under the terms of the GNU General Public License as published by
6  *  the Free Software Foundation; either version 2 of the License, or
7  *  (at your option) any later version.
8  *
9  *  This program is distributed in the hope that it will be useful,
10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *  GNU General Public License for more details.
13  *
14  *  You should have received a copy of the GNU General Public License
15  *  along with this program; if not, write to the Free Software
16  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
17  *
18  *  Copyright (c) 2009-2013 Silicon Graphics, Inc.  All Rights Reserved.
19  *  Copyright (c) Mike Travis
20  */
21 
22 #include <linux/cpu.h>
23 #include <linux/delay.h>
24 #include <linux/kdb.h>
25 #include <linux/kexec.h>
26 #include <linux/kgdb.h>
27 #include <linux/moduleparam.h>
28 #include <linux/nmi.h>
29 #include <linux/sched.h>
30 #include <linux/sched/debug.h>
31 #include <linux/slab.h>
32 #include <linux/clocksource.h>
33 
34 #include <asm/apic.h>
35 #include <asm/current.h>
36 #include <asm/kdebug.h>
37 #include <asm/local64.h>
38 #include <asm/nmi.h>
39 #include <asm/traps.h>
40 #include <asm/uv/uv.h>
41 #include <asm/uv/uv_hub.h>
42 #include <asm/uv/uv_mmrs.h>
43 
44 /*
45  * UV handler for NMI
46  *
47  * Handle system-wide NMI events generated by the global 'power nmi' command.
48  *
49  * Basic operation is to field the NMI interrupt on each CPU and wait
50  * until all CPU's have arrived into the nmi handler.  If some CPU's do not
51  * make it into the handler, try and force them in with the IPI(NMI) signal.
52  *
53  * We also have to lessen UV Hub MMR accesses as much as possible as this
54  * disrupts the UV Hub's primary mission of directing NumaLink traffic and
55  * can cause system problems to occur.
56  *
57  * To do this we register our primary NMI notifier on the NMI_UNKNOWN
58  * chain.  This reduces the number of false NMI calls when the perf
59  * tools are running which generate an enormous number of NMIs per
60  * second (~4M/s for 1024 CPU threads).  Our secondary NMI handler is
61  * very short as it only checks that if it has been "pinged" with the
62  * IPI(NMI) signal as mentioned above, and does not read the UV Hub's MMR.
63  *
64  */
65 
66 static struct uv_hub_nmi_s **uv_hub_nmi_list;
67 
68 DEFINE_PER_CPU(struct uv_cpu_nmi_s, uv_cpu_nmi);
69 
70 /* UV hubless values */
71 #define NMI_CONTROL_PORT	0x70
72 #define NMI_DUMMY_PORT		0x71
73 #define PAD_OWN_GPP_D_0		0x2c
74 #define GPI_NMI_STS_GPP_D_0	0x164
75 #define GPI_NMI_ENA_GPP_D_0	0x174
76 #define STS_GPP_D_0_MASK	0x1
77 #define PAD_CFG_DW0_GPP_D_0	0x4c0
78 #define GPIROUTNMI		(1ul << 17)
79 #define PCH_PCR_GPIO_1_BASE	0xfdae0000ul
80 #define PCH_PCR_GPIO_ADDRESS(offset) (int *)((u64)(pch_base) | (u64)(offset))
81 
82 static u64 *pch_base;
83 static unsigned long nmi_mmr;
84 static unsigned long nmi_mmr_clear;
85 static unsigned long nmi_mmr_pending;
86 
87 static atomic_t	uv_in_nmi;
88 static atomic_t uv_nmi_cpu = ATOMIC_INIT(-1);
89 static atomic_t uv_nmi_cpus_in_nmi = ATOMIC_INIT(-1);
90 static atomic_t uv_nmi_slave_continue;
91 static cpumask_var_t uv_nmi_cpu_mask;
92 
93 /* Values for uv_nmi_slave_continue */
94 #define SLAVE_CLEAR	0
95 #define SLAVE_CONTINUE	1
96 #define SLAVE_EXIT	2
97 
98 /*
99  * Default is all stack dumps go to the console and buffer.
100  * Lower level to send to log buffer only.
101  */
102 static int uv_nmi_loglevel = CONSOLE_LOGLEVEL_DEFAULT;
103 module_param_named(dump_loglevel, uv_nmi_loglevel, int, 0644);
104 
105 /*
106  * The following values show statistics on how perf events are affecting
107  * this system.
108  */
109 static int param_get_local64(char *buffer, const struct kernel_param *kp)
110 {
111 	return sprintf(buffer, "%lu\n", local64_read((local64_t *)kp->arg));
112 }
113 
114 static int param_set_local64(const char *val, const struct kernel_param *kp)
115 {
116 	/* Clear on any write */
117 	local64_set((local64_t *)kp->arg, 0);
118 	return 0;
119 }
120 
121 static const struct kernel_param_ops param_ops_local64 = {
122 	.get = param_get_local64,
123 	.set = param_set_local64,
124 };
125 #define param_check_local64(name, p) __param_check(name, p, local64_t)
126 
127 static local64_t uv_nmi_count;
128 module_param_named(nmi_count, uv_nmi_count, local64, 0644);
129 
130 static local64_t uv_nmi_misses;
131 module_param_named(nmi_misses, uv_nmi_misses, local64, 0644);
132 
133 static local64_t uv_nmi_ping_count;
134 module_param_named(ping_count, uv_nmi_ping_count, local64, 0644);
135 
136 static local64_t uv_nmi_ping_misses;
137 module_param_named(ping_misses, uv_nmi_ping_misses, local64, 0644);
138 
139 /*
140  * Following values allow tuning for large systems under heavy loading
141  */
142 static int uv_nmi_initial_delay = 100;
143 module_param_named(initial_delay, uv_nmi_initial_delay, int, 0644);
144 
145 static int uv_nmi_slave_delay = 100;
146 module_param_named(slave_delay, uv_nmi_slave_delay, int, 0644);
147 
148 static int uv_nmi_loop_delay = 100;
149 module_param_named(loop_delay, uv_nmi_loop_delay, int, 0644);
150 
151 static int uv_nmi_trigger_delay = 10000;
152 module_param_named(trigger_delay, uv_nmi_trigger_delay, int, 0644);
153 
154 static int uv_nmi_wait_count = 100;
155 module_param_named(wait_count, uv_nmi_wait_count, int, 0644);
156 
157 static int uv_nmi_retry_count = 500;
158 module_param_named(retry_count, uv_nmi_retry_count, int, 0644);
159 
160 static bool uv_pch_intr_enable = true;
161 static bool uv_pch_intr_now_enabled;
162 module_param_named(pch_intr_enable, uv_pch_intr_enable, bool, 0644);
163 
164 static bool uv_pch_init_enable = true;
165 module_param_named(pch_init_enable, uv_pch_init_enable, bool, 0644);
166 
167 static int uv_nmi_debug;
168 module_param_named(debug, uv_nmi_debug, int, 0644);
169 
170 #define nmi_debug(fmt, ...)				\
171 	do {						\
172 		if (uv_nmi_debug)			\
173 			pr_info(fmt, ##__VA_ARGS__);	\
174 	} while (0)
175 
176 /* Valid NMI Actions */
177 #define	ACTION_LEN	16
178 static struct nmi_action {
179 	char	*action;
180 	char	*desc;
181 } valid_acts[] = {
182 	{	"kdump",	"do kernel crash dump"			},
183 	{	"dump",		"dump process stack for each cpu"	},
184 	{	"ips",		"dump Inst Ptr info for each cpu"	},
185 	{	"kdb",		"enter KDB (needs kgdboc= assignment)"	},
186 	{	"kgdb",		"enter KGDB (needs gdb target remote)"	},
187 	{	"health",	"check if CPUs respond to NMI"		},
188 };
189 typedef char action_t[ACTION_LEN];
190 static action_t uv_nmi_action = { "dump" };
191 
192 static int param_get_action(char *buffer, const struct kernel_param *kp)
193 {
194 	return sprintf(buffer, "%s\n", uv_nmi_action);
195 }
196 
197 static int param_set_action(const char *val, const struct kernel_param *kp)
198 {
199 	int i;
200 	int n = ARRAY_SIZE(valid_acts);
201 	char arg[ACTION_LEN], *p;
202 
203 	/* (remove possible '\n') */
204 	strncpy(arg, val, ACTION_LEN - 1);
205 	arg[ACTION_LEN - 1] = '\0';
206 	p = strchr(arg, '\n');
207 	if (p)
208 		*p = '\0';
209 
210 	for (i = 0; i < n; i++)
211 		if (!strcmp(arg, valid_acts[i].action))
212 			break;
213 
214 	if (i < n) {
215 		strcpy(uv_nmi_action, arg);
216 		pr_info("UV: New NMI action:%s\n", uv_nmi_action);
217 		return 0;
218 	}
219 
220 	pr_err("UV: Invalid NMI action:%s, valid actions are:\n", arg);
221 	for (i = 0; i < n; i++)
222 		pr_err("UV: %-8s - %s\n",
223 			valid_acts[i].action, valid_acts[i].desc);
224 	return -EINVAL;
225 }
226 
227 static const struct kernel_param_ops param_ops_action = {
228 	.get = param_get_action,
229 	.set = param_set_action,
230 };
231 #define param_check_action(name, p) __param_check(name, p, action_t)
232 
233 module_param_named(action, uv_nmi_action, action, 0644);
234 
235 static inline bool uv_nmi_action_is(const char *action)
236 {
237 	return (strncmp(uv_nmi_action, action, strlen(action)) == 0);
238 }
239 
240 /* Setup which NMI support is present in system */
241 static void uv_nmi_setup_mmrs(void)
242 {
243 	if (uv_read_local_mmr(UVH_NMI_MMRX_SUPPORTED)) {
244 		uv_write_local_mmr(UVH_NMI_MMRX_REQ,
245 					1UL << UVH_NMI_MMRX_REQ_SHIFT);
246 		nmi_mmr = UVH_NMI_MMRX;
247 		nmi_mmr_clear = UVH_NMI_MMRX_CLEAR;
248 		nmi_mmr_pending = 1UL << UVH_NMI_MMRX_SHIFT;
249 		pr_info("UV: SMI NMI support: %s\n", UVH_NMI_MMRX_TYPE);
250 	} else {
251 		nmi_mmr = UVH_NMI_MMR;
252 		nmi_mmr_clear = UVH_NMI_MMR_CLEAR;
253 		nmi_mmr_pending = 1UL << UVH_NMI_MMR_SHIFT;
254 		pr_info("UV: SMI NMI support: %s\n", UVH_NMI_MMR_TYPE);
255 	}
256 }
257 
258 /* Read NMI MMR and check if NMI flag was set by BMC. */
259 static inline int uv_nmi_test_mmr(struct uv_hub_nmi_s *hub_nmi)
260 {
261 	hub_nmi->nmi_value = uv_read_local_mmr(nmi_mmr);
262 	atomic_inc(&hub_nmi->read_mmr_count);
263 	return !!(hub_nmi->nmi_value & nmi_mmr_pending);
264 }
265 
266 static inline void uv_local_mmr_clear_nmi(void)
267 {
268 	uv_write_local_mmr(nmi_mmr_clear, nmi_mmr_pending);
269 }
270 
271 /*
272  * UV hubless NMI handler functions
273  */
274 static inline void uv_reassert_nmi(void)
275 {
276 	/* (from arch/x86/include/asm/mach_traps.h) */
277 	outb(0x8f, NMI_CONTROL_PORT);
278 	inb(NMI_DUMMY_PORT);		/* dummy read */
279 	outb(0x0f, NMI_CONTROL_PORT);
280 	inb(NMI_DUMMY_PORT);		/* dummy read */
281 }
282 
283 static void uv_init_hubless_pch_io(int offset, int mask, int data)
284 {
285 	int *addr = PCH_PCR_GPIO_ADDRESS(offset);
286 	int readd = readl(addr);
287 
288 	if (mask) {			/* OR in new data */
289 		int writed = (readd & ~mask) | data;
290 
291 		nmi_debug("UV:PCH: %p = %x & %x | %x (%x)\n",
292 			addr, readd, ~mask, data, writed);
293 		writel(writed, addr);
294 	} else if (readd & data) {	/* clear status bit */
295 		nmi_debug("UV:PCH: %p = %x\n", addr, data);
296 		writel(data, addr);
297 	}
298 
299 	(void)readl(addr);		/* flush write data */
300 }
301 
302 static void uv_nmi_setup_hubless_intr(void)
303 {
304 	uv_pch_intr_now_enabled = uv_pch_intr_enable;
305 
306 	uv_init_hubless_pch_io(
307 		PAD_CFG_DW0_GPP_D_0, GPIROUTNMI,
308 		uv_pch_intr_now_enabled ? GPIROUTNMI : 0);
309 
310 	nmi_debug("UV:NMI: GPP_D_0 interrupt %s\n",
311 		uv_pch_intr_now_enabled ? "enabled" : "disabled");
312 }
313 
314 static struct init_nmi {
315 	unsigned int	offset;
316 	unsigned int	mask;
317 	unsigned int	data;
318 } init_nmi[] = {
319 	{	/* HOSTSW_OWN_GPP_D_0 */
320 	.offset = 0x84,
321 	.mask = 0x1,
322 	.data = 0x0,	/* ACPI Mode */
323 	},
324 
325 /* Clear status: */
326 	{	/* GPI_INT_STS_GPP_D_0 */
327 	.offset = 0x104,
328 	.mask = 0x0,
329 	.data = 0x1,	/* Clear Status */
330 	},
331 	{	/* GPI_GPE_STS_GPP_D_0 */
332 	.offset = 0x124,
333 	.mask = 0x0,
334 	.data = 0x1,	/* Clear Status */
335 	},
336 	{	/* GPI_SMI_STS_GPP_D_0 */
337 	.offset = 0x144,
338 	.mask = 0x0,
339 	.data = 0x1,	/* Clear Status */
340 	},
341 	{	/* GPI_NMI_STS_GPP_D_0 */
342 	.offset = 0x164,
343 	.mask = 0x0,
344 	.data = 0x1,	/* Clear Status */
345 	},
346 
347 /* Disable interrupts: */
348 	{	/* GPI_INT_EN_GPP_D_0 */
349 	.offset = 0x114,
350 	.mask = 0x1,
351 	.data = 0x0,	/* Disable interrupt generation */
352 	},
353 	{	/* GPI_GPE_EN_GPP_D_0 */
354 	.offset = 0x134,
355 	.mask = 0x1,
356 	.data = 0x0,	/* Disable interrupt generation */
357 	},
358 	{	/* GPI_SMI_EN_GPP_D_0 */
359 	.offset = 0x154,
360 	.mask = 0x1,
361 	.data = 0x0,	/* Disable interrupt generation */
362 	},
363 	{	/* GPI_NMI_EN_GPP_D_0 */
364 	.offset = 0x174,
365 	.mask = 0x1,
366 	.data = 0x0,	/* Disable interrupt generation */
367 	},
368 
369 /* Setup GPP_D_0 Pad Config: */
370 	{	/* PAD_CFG_DW0_GPP_D_0 */
371 	.offset = 0x4c0,
372 	.mask = 0xffffffff,
373 	.data = 0x82020100,
374 /*
375  *  31:30 Pad Reset Config (PADRSTCFG): = 2h  # PLTRST# (default)
376  *
377  *  29    RX Pad State Select (RXPADSTSEL): = 0 # Raw RX pad state directly
378  *                                                from RX buffer (default)
379  *
380  *  28    RX Raw Override to '1' (RXRAW1): = 0 # No Override
381  *
382  *  26:25 RX Level/Edge Configuration (RXEVCFG):
383  *      = 0h # Level
384  *      = 1h # Edge
385  *
386  *  23    RX Invert (RXINV): = 0 # No Inversion (signal active high)
387  *
388  *  20    GPIO Input Route IOxAPIC (GPIROUTIOXAPIC):
389  * = 0 # Routing does not cause peripheral IRQ...
390  *     # (we want an NMI not an IRQ)
391  *
392  *  19    GPIO Input Route SCI (GPIROUTSCI): = 0 # Routing does not cause SCI.
393  *  18    GPIO Input Route SMI (GPIROUTSMI): = 0 # Routing does not cause SMI.
394  *  17    GPIO Input Route NMI (GPIROUTNMI): = 1 # Routing can cause NMI.
395  *
396  *  11:10 Pad Mode (PMODE1/0): = 0h = GPIO control the Pad.
397  *   9    GPIO RX Disable (GPIORXDIS):
398  * = 0 # Enable the input buffer (active low enable)
399  *
400  *   8    GPIO TX Disable (GPIOTXDIS):
401  * = 1 # Disable the output buffer; i.e. Hi-Z
402  *
403  *   1 GPIO RX State (GPIORXSTATE): This is the current internal RX pad state..
404  *   0 GPIO TX State (GPIOTXSTATE):
405  * = 0 # (Leave at default)
406  */
407 	},
408 
409 /* Pad Config DW1 */
410 	{	/* PAD_CFG_DW1_GPP_D_0 */
411 	.offset = 0x4c4,
412 	.mask = 0x3c00,
413 	.data = 0,	/* Termination = none (default) */
414 	},
415 };
416 
417 static void uv_init_hubless_pch_d0(void)
418 {
419 	int i, read;
420 
421 	read = *PCH_PCR_GPIO_ADDRESS(PAD_OWN_GPP_D_0);
422 	if (read != 0) {
423 		pr_info("UV: Hubless NMI already configured\n");
424 		return;
425 	}
426 
427 	nmi_debug("UV: Initializing UV Hubless NMI on PCH\n");
428 	for (i = 0; i < ARRAY_SIZE(init_nmi); i++) {
429 		uv_init_hubless_pch_io(init_nmi[i].offset,
430 					init_nmi[i].mask,
431 					init_nmi[i].data);
432 	}
433 }
434 
435 static int uv_nmi_test_hubless(struct uv_hub_nmi_s *hub_nmi)
436 {
437 	int *pstat = PCH_PCR_GPIO_ADDRESS(GPI_NMI_STS_GPP_D_0);
438 	int status = *pstat;
439 
440 	hub_nmi->nmi_value = status;
441 	atomic_inc(&hub_nmi->read_mmr_count);
442 
443 	if (!(status & STS_GPP_D_0_MASK))	/* Not a UV external NMI */
444 		return 0;
445 
446 	*pstat = STS_GPP_D_0_MASK;	/* Is a UV NMI: clear GPP_D_0 status */
447 	(void)*pstat;			/* Flush write */
448 
449 	return 1;
450 }
451 
452 static int uv_test_nmi(struct uv_hub_nmi_s *hub_nmi)
453 {
454 	if (hub_nmi->hub_present)
455 		return uv_nmi_test_mmr(hub_nmi);
456 
457 	if (hub_nmi->pch_owner)		/* Only PCH owner can check status */
458 		return uv_nmi_test_hubless(hub_nmi);
459 
460 	return -1;
461 }
462 
463 /*
464  * If first CPU in on this hub, set hub_nmi "in_nmi" and "owner" values and
465  * return true.  If first CPU in on the system, set global "in_nmi" flag.
466  */
467 static int uv_set_in_nmi(int cpu, struct uv_hub_nmi_s *hub_nmi)
468 {
469 	int first = atomic_add_unless(&hub_nmi->in_nmi, 1, 1);
470 
471 	if (first) {
472 		atomic_set(&hub_nmi->cpu_owner, cpu);
473 		if (atomic_add_unless(&uv_in_nmi, 1, 1))
474 			atomic_set(&uv_nmi_cpu, cpu);
475 
476 		atomic_inc(&hub_nmi->nmi_count);
477 	}
478 	return first;
479 }
480 
481 /* Check if this is a system NMI event */
482 static int uv_check_nmi(struct uv_hub_nmi_s *hub_nmi)
483 {
484 	int cpu = smp_processor_id();
485 	int nmi = 0;
486 	int nmi_detected = 0;
487 
488 	local64_inc(&uv_nmi_count);
489 	this_cpu_inc(uv_cpu_nmi.queries);
490 
491 	do {
492 		nmi = atomic_read(&hub_nmi->in_nmi);
493 		if (nmi)
494 			break;
495 
496 		if (raw_spin_trylock(&hub_nmi->nmi_lock)) {
497 			nmi_detected = uv_test_nmi(hub_nmi);
498 
499 			/* Check flag for UV external NMI */
500 			if (nmi_detected > 0) {
501 				uv_set_in_nmi(cpu, hub_nmi);
502 				nmi = 1;
503 				break;
504 			}
505 
506 			/* A non-PCH node in a hubless system waits for NMI */
507 			else if (nmi_detected < 0)
508 				goto slave_wait;
509 
510 			/* MMR/PCH NMI flag is clear */
511 			raw_spin_unlock(&hub_nmi->nmi_lock);
512 
513 		} else {
514 
515 			/* Wait a moment for the HUB NMI locker to set flag */
516 slave_wait:		cpu_relax();
517 			udelay(uv_nmi_slave_delay);
518 
519 			/* Re-check hub in_nmi flag */
520 			nmi = atomic_read(&hub_nmi->in_nmi);
521 			if (nmi)
522 				break;
523 		}
524 
525 		/*
526 		 * Check if this BMC missed setting the MMR NMI flag (or)
527 		 * UV hubless system where only PCH owner can check flag
528 		 */
529 		if (!nmi) {
530 			nmi = atomic_read(&uv_in_nmi);
531 			if (nmi)
532 				uv_set_in_nmi(cpu, hub_nmi);
533 		}
534 
535 		/* If we're holding the hub lock, release it now */
536 		if (nmi_detected < 0)
537 			raw_spin_unlock(&hub_nmi->nmi_lock);
538 
539 	} while (0);
540 
541 	if (!nmi)
542 		local64_inc(&uv_nmi_misses);
543 
544 	return nmi;
545 }
546 
547 /* Need to reset the NMI MMR register, but only once per hub. */
548 static inline void uv_clear_nmi(int cpu)
549 {
550 	struct uv_hub_nmi_s *hub_nmi = uv_hub_nmi;
551 
552 	if (cpu == atomic_read(&hub_nmi->cpu_owner)) {
553 		atomic_set(&hub_nmi->cpu_owner, -1);
554 		atomic_set(&hub_nmi->in_nmi, 0);
555 		if (hub_nmi->hub_present)
556 			uv_local_mmr_clear_nmi();
557 		else
558 			uv_reassert_nmi();
559 		raw_spin_unlock(&hub_nmi->nmi_lock);
560 	}
561 }
562 
563 /* Ping non-responding CPU's attempting to force them into the NMI handler */
564 static void uv_nmi_nr_cpus_ping(void)
565 {
566 	int cpu;
567 
568 	for_each_cpu(cpu, uv_nmi_cpu_mask)
569 		uv_cpu_nmi_per(cpu).pinging = 1;
570 
571 	apic->send_IPI_mask(uv_nmi_cpu_mask, APIC_DM_NMI);
572 }
573 
574 /* Clean up flags for CPU's that ignored both NMI and ping */
575 static void uv_nmi_cleanup_mask(void)
576 {
577 	int cpu;
578 
579 	for_each_cpu(cpu, uv_nmi_cpu_mask) {
580 		uv_cpu_nmi_per(cpu).pinging =  0;
581 		uv_cpu_nmi_per(cpu).state = UV_NMI_STATE_OUT;
582 		cpumask_clear_cpu(cpu, uv_nmi_cpu_mask);
583 	}
584 }
585 
586 /* Loop waiting as CPU's enter NMI handler */
587 static int uv_nmi_wait_cpus(int first)
588 {
589 	int i, j, k, n = num_online_cpus();
590 	int last_k = 0, waiting = 0;
591 	int cpu = smp_processor_id();
592 
593 	if (first) {
594 		cpumask_copy(uv_nmi_cpu_mask, cpu_online_mask);
595 		k = 0;
596 	} else {
597 		k = n - cpumask_weight(uv_nmi_cpu_mask);
598 	}
599 
600 	/* PCH NMI causes only one CPU to respond */
601 	if (first && uv_pch_intr_now_enabled) {
602 		cpumask_clear_cpu(cpu, uv_nmi_cpu_mask);
603 		return n - k - 1;
604 	}
605 
606 	udelay(uv_nmi_initial_delay);
607 	for (i = 0; i < uv_nmi_retry_count; i++) {
608 		int loop_delay = uv_nmi_loop_delay;
609 
610 		for_each_cpu(j, uv_nmi_cpu_mask) {
611 			if (uv_cpu_nmi_per(j).state) {
612 				cpumask_clear_cpu(j, uv_nmi_cpu_mask);
613 				if (++k >= n)
614 					break;
615 			}
616 		}
617 		if (k >= n) {		/* all in? */
618 			k = n;
619 			break;
620 		}
621 		if (last_k != k) {	/* abort if no new CPU's coming in */
622 			last_k = k;
623 			waiting = 0;
624 		} else if (++waiting > uv_nmi_wait_count)
625 			break;
626 
627 		/* Extend delay if waiting only for CPU 0: */
628 		if (waiting && (n - k) == 1 &&
629 		    cpumask_test_cpu(0, uv_nmi_cpu_mask))
630 			loop_delay *= 100;
631 
632 		udelay(loop_delay);
633 	}
634 	atomic_set(&uv_nmi_cpus_in_nmi, k);
635 	return n - k;
636 }
637 
638 /* Wait until all slave CPU's have entered UV NMI handler */
639 static void uv_nmi_wait(int master)
640 {
641 	/* Indicate this CPU is in: */
642 	this_cpu_write(uv_cpu_nmi.state, UV_NMI_STATE_IN);
643 
644 	/* If not the first CPU in (the master), then we are a slave CPU */
645 	if (!master)
646 		return;
647 
648 	do {
649 		/* Wait for all other CPU's to gather here */
650 		if (!uv_nmi_wait_cpus(1))
651 			break;
652 
653 		/* If not all made it in, send IPI NMI to them */
654 		pr_alert("UV: Sending NMI IPI to %d CPUs: %*pbl\n",
655 			 cpumask_weight(uv_nmi_cpu_mask),
656 			 cpumask_pr_args(uv_nmi_cpu_mask));
657 
658 		uv_nmi_nr_cpus_ping();
659 
660 		/* If all CPU's are in, then done */
661 		if (!uv_nmi_wait_cpus(0))
662 			break;
663 
664 		pr_alert("UV: %d CPUs not in NMI loop: %*pbl\n",
665 			 cpumask_weight(uv_nmi_cpu_mask),
666 			 cpumask_pr_args(uv_nmi_cpu_mask));
667 	} while (0);
668 
669 	pr_alert("UV: %d of %d CPUs in NMI\n",
670 		atomic_read(&uv_nmi_cpus_in_nmi), num_online_cpus());
671 }
672 
673 /* Dump Instruction Pointer header */
674 static void uv_nmi_dump_cpu_ip_hdr(void)
675 {
676 	pr_info("\nUV: %4s %6s %-32s %s   (Note: PID 0 not listed)\n",
677 		"CPU", "PID", "COMMAND", "IP");
678 }
679 
680 /* Dump Instruction Pointer info */
681 static void uv_nmi_dump_cpu_ip(int cpu, struct pt_regs *regs)
682 {
683 	pr_info("UV: %4d %6d %-32.32s %pS",
684 		cpu, current->pid, current->comm, (void *)regs->ip);
685 }
686 
687 /*
688  * Dump this CPU's state.  If action was set to "kdump" and the crash_kexec
689  * failed, then we provide "dump" as an alternate action.  Action "dump" now
690  * also includes the show "ips" (instruction pointers) action whereas the
691  * action "ips" only displays instruction pointers for the non-idle CPU's.
692  * This is an abbreviated form of the "ps" command.
693  */
694 static void uv_nmi_dump_state_cpu(int cpu, struct pt_regs *regs)
695 {
696 	const char *dots = " ................................. ";
697 
698 	if (cpu == 0)
699 		uv_nmi_dump_cpu_ip_hdr();
700 
701 	if (current->pid != 0 || !uv_nmi_action_is("ips"))
702 		uv_nmi_dump_cpu_ip(cpu, regs);
703 
704 	if (uv_nmi_action_is("dump")) {
705 		pr_info("UV:%sNMI process trace for CPU %d\n", dots, cpu);
706 		show_regs(regs);
707 	}
708 
709 	this_cpu_write(uv_cpu_nmi.state, UV_NMI_STATE_DUMP_DONE);
710 }
711 
712 /* Trigger a slave CPU to dump it's state */
713 static void uv_nmi_trigger_dump(int cpu)
714 {
715 	int retry = uv_nmi_trigger_delay;
716 
717 	if (uv_cpu_nmi_per(cpu).state != UV_NMI_STATE_IN)
718 		return;
719 
720 	uv_cpu_nmi_per(cpu).state = UV_NMI_STATE_DUMP;
721 	do {
722 		cpu_relax();
723 		udelay(10);
724 		if (uv_cpu_nmi_per(cpu).state
725 				!= UV_NMI_STATE_DUMP)
726 			return;
727 	} while (--retry > 0);
728 
729 	pr_crit("UV: CPU %d stuck in process dump function\n", cpu);
730 	uv_cpu_nmi_per(cpu).state = UV_NMI_STATE_DUMP_DONE;
731 }
732 
733 /* Wait until all CPU's ready to exit */
734 static void uv_nmi_sync_exit(int master)
735 {
736 	atomic_dec(&uv_nmi_cpus_in_nmi);
737 	if (master) {
738 		while (atomic_read(&uv_nmi_cpus_in_nmi) > 0)
739 			cpu_relax();
740 		atomic_set(&uv_nmi_slave_continue, SLAVE_CLEAR);
741 	} else {
742 		while (atomic_read(&uv_nmi_slave_continue))
743 			cpu_relax();
744 	}
745 }
746 
747 /* Current "health" check is to check which CPU's are responsive */
748 static void uv_nmi_action_health(int cpu, struct pt_regs *regs, int master)
749 {
750 	if (master) {
751 		int in = atomic_read(&uv_nmi_cpus_in_nmi);
752 		int out = num_online_cpus() - in;
753 
754 		pr_alert("UV: NMI CPU health check (non-responding:%d)\n", out);
755 		atomic_set(&uv_nmi_slave_continue, SLAVE_EXIT);
756 	} else {
757 		while (!atomic_read(&uv_nmi_slave_continue))
758 			cpu_relax();
759 	}
760 	uv_nmi_sync_exit(master);
761 }
762 
763 /* Walk through CPU list and dump state of each */
764 static void uv_nmi_dump_state(int cpu, struct pt_regs *regs, int master)
765 {
766 	if (master) {
767 		int tcpu;
768 		int ignored = 0;
769 		int saved_console_loglevel = console_loglevel;
770 
771 		pr_alert("UV: tracing %s for %d CPUs from CPU %d\n",
772 			uv_nmi_action_is("ips") ? "IPs" : "processes",
773 			atomic_read(&uv_nmi_cpus_in_nmi), cpu);
774 
775 		console_loglevel = uv_nmi_loglevel;
776 		atomic_set(&uv_nmi_slave_continue, SLAVE_EXIT);
777 		for_each_online_cpu(tcpu) {
778 			if (cpumask_test_cpu(tcpu, uv_nmi_cpu_mask))
779 				ignored++;
780 			else if (tcpu == cpu)
781 				uv_nmi_dump_state_cpu(tcpu, regs);
782 			else
783 				uv_nmi_trigger_dump(tcpu);
784 		}
785 		if (ignored)
786 			pr_alert("UV: %d CPUs ignored NMI\n", ignored);
787 
788 		console_loglevel = saved_console_loglevel;
789 		pr_alert("UV: process trace complete\n");
790 	} else {
791 		while (!atomic_read(&uv_nmi_slave_continue))
792 			cpu_relax();
793 		while (this_cpu_read(uv_cpu_nmi.state) != UV_NMI_STATE_DUMP)
794 			cpu_relax();
795 		uv_nmi_dump_state_cpu(cpu, regs);
796 	}
797 	uv_nmi_sync_exit(master);
798 }
799 
800 static void uv_nmi_touch_watchdogs(void)
801 {
802 	touch_softlockup_watchdog_sync();
803 	clocksource_touch_watchdog();
804 	rcu_cpu_stall_reset();
805 	touch_nmi_watchdog();
806 }
807 
808 static atomic_t uv_nmi_kexec_failed;
809 
810 #if defined(CONFIG_KEXEC_CORE)
811 static void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs)
812 {
813 	/* Call crash to dump system state */
814 	if (master) {
815 		pr_emerg("UV: NMI executing crash_kexec on CPU%d\n", cpu);
816 		crash_kexec(regs);
817 
818 		pr_emerg("UV: crash_kexec unexpectedly returned, ");
819 		atomic_set(&uv_nmi_kexec_failed, 1);
820 		if (!kexec_crash_image) {
821 			pr_cont("crash kernel not loaded\n");
822 			return;
823 		}
824 		pr_cont("kexec busy, stalling cpus while waiting\n");
825 	}
826 
827 	/* If crash exec fails the slaves should return, otherwise stall */
828 	while (atomic_read(&uv_nmi_kexec_failed) == 0)
829 		mdelay(10);
830 }
831 
832 #else /* !CONFIG_KEXEC_CORE */
833 static inline void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs)
834 {
835 	if (master)
836 		pr_err("UV: NMI kdump: KEXEC not supported in this kernel\n");
837 	atomic_set(&uv_nmi_kexec_failed, 1);
838 }
839 #endif /* !CONFIG_KEXEC_CORE */
840 
841 #ifdef CONFIG_KGDB
842 #ifdef CONFIG_KGDB_KDB
843 static inline int uv_nmi_kdb_reason(void)
844 {
845 	return KDB_REASON_SYSTEM_NMI;
846 }
847 #else /* !CONFIG_KGDB_KDB */
848 static inline int uv_nmi_kdb_reason(void)
849 {
850 	/* Ensure user is expecting to attach gdb remote */
851 	if (uv_nmi_action_is("kgdb"))
852 		return 0;
853 
854 	pr_err("UV: NMI error: KDB is not enabled in this kernel\n");
855 	return -1;
856 }
857 #endif /* CONFIG_KGDB_KDB */
858 
859 /*
860  * Call KGDB/KDB from NMI handler
861  *
862  * Note that if both KGDB and KDB are configured, then the action of 'kgdb' or
863  * 'kdb' has no affect on which is used.  See the KGDB documention for further
864  * information.
865  */
866 static void uv_call_kgdb_kdb(int cpu, struct pt_regs *regs, int master)
867 {
868 	if (master) {
869 		int reason = uv_nmi_kdb_reason();
870 		int ret;
871 
872 		if (reason < 0)
873 			return;
874 
875 		/* Call KGDB NMI handler as MASTER */
876 		ret = kgdb_nmicallin(cpu, X86_TRAP_NMI, regs, reason,
877 				&uv_nmi_slave_continue);
878 		if (ret) {
879 			pr_alert("KGDB returned error, is kgdboc set?\n");
880 			atomic_set(&uv_nmi_slave_continue, SLAVE_EXIT);
881 		}
882 	} else {
883 		/* Wait for KGDB signal that it's ready for slaves to enter */
884 		int sig;
885 
886 		do {
887 			cpu_relax();
888 			sig = atomic_read(&uv_nmi_slave_continue);
889 		} while (!sig);
890 
891 		/* Call KGDB as slave */
892 		if (sig == SLAVE_CONTINUE)
893 			kgdb_nmicallback(cpu, regs);
894 	}
895 	uv_nmi_sync_exit(master);
896 }
897 
898 #else /* !CONFIG_KGDB */
899 static inline void uv_call_kgdb_kdb(int cpu, struct pt_regs *regs, int master)
900 {
901 	pr_err("UV: NMI error: KGDB is not enabled in this kernel\n");
902 }
903 #endif /* !CONFIG_KGDB */
904 
905 /*
906  * UV NMI handler
907  */
908 static int uv_handle_nmi(unsigned int reason, struct pt_regs *regs)
909 {
910 	struct uv_hub_nmi_s *hub_nmi = uv_hub_nmi;
911 	int cpu = smp_processor_id();
912 	int master = 0;
913 	unsigned long flags;
914 
915 	local_irq_save(flags);
916 
917 	/* If not a UV System NMI, ignore */
918 	if (!this_cpu_read(uv_cpu_nmi.pinging) && !uv_check_nmi(hub_nmi)) {
919 		local_irq_restore(flags);
920 		return NMI_DONE;
921 	}
922 
923 	/* Indicate we are the first CPU into the NMI handler */
924 	master = (atomic_read(&uv_nmi_cpu) == cpu);
925 
926 	/* If NMI action is "kdump", then attempt to do it */
927 	if (uv_nmi_action_is("kdump")) {
928 		uv_nmi_kdump(cpu, master, regs);
929 
930 		/* Unexpected return, revert action to "dump" */
931 		if (master)
932 			strncpy(uv_nmi_action, "dump", strlen(uv_nmi_action));
933 	}
934 
935 	/* Pause as all CPU's enter the NMI handler */
936 	uv_nmi_wait(master);
937 
938 	/* Process actions other than "kdump": */
939 	if (uv_nmi_action_is("health")) {
940 		uv_nmi_action_health(cpu, regs, master);
941 	} else if (uv_nmi_action_is("ips") || uv_nmi_action_is("dump")) {
942 		uv_nmi_dump_state(cpu, regs, master);
943 	} else if (uv_nmi_action_is("kdb") || uv_nmi_action_is("kgdb")) {
944 		uv_call_kgdb_kdb(cpu, regs, master);
945 	} else {
946 		if (master)
947 			pr_alert("UV: unknown NMI action: %s\n", uv_nmi_action);
948 		uv_nmi_sync_exit(master);
949 	}
950 
951 	/* Clear per_cpu "in_nmi" flag */
952 	this_cpu_write(uv_cpu_nmi.state, UV_NMI_STATE_OUT);
953 
954 	/* Clear MMR NMI flag on each hub */
955 	uv_clear_nmi(cpu);
956 
957 	/* Clear global flags */
958 	if (master) {
959 		if (cpumask_weight(uv_nmi_cpu_mask))
960 			uv_nmi_cleanup_mask();
961 		atomic_set(&uv_nmi_cpus_in_nmi, -1);
962 		atomic_set(&uv_nmi_cpu, -1);
963 		atomic_set(&uv_in_nmi, 0);
964 		atomic_set(&uv_nmi_kexec_failed, 0);
965 		atomic_set(&uv_nmi_slave_continue, SLAVE_CLEAR);
966 	}
967 
968 	uv_nmi_touch_watchdogs();
969 	local_irq_restore(flags);
970 
971 	return NMI_HANDLED;
972 }
973 
974 /*
975  * NMI handler for pulling in CPU's when perf events are grabbing our NMI
976  */
977 static int uv_handle_nmi_ping(unsigned int reason, struct pt_regs *regs)
978 {
979 	int ret;
980 
981 	this_cpu_inc(uv_cpu_nmi.queries);
982 	if (!this_cpu_read(uv_cpu_nmi.pinging)) {
983 		local64_inc(&uv_nmi_ping_misses);
984 		return NMI_DONE;
985 	}
986 
987 	this_cpu_inc(uv_cpu_nmi.pings);
988 	local64_inc(&uv_nmi_ping_count);
989 	ret = uv_handle_nmi(reason, regs);
990 	this_cpu_write(uv_cpu_nmi.pinging, 0);
991 	return ret;
992 }
993 
994 static void uv_register_nmi_notifier(void)
995 {
996 	if (register_nmi_handler(NMI_UNKNOWN, uv_handle_nmi, 0, "uv"))
997 		pr_warn("UV: NMI handler failed to register\n");
998 
999 	if (register_nmi_handler(NMI_LOCAL, uv_handle_nmi_ping, 0, "uvping"))
1000 		pr_warn("UV: PING NMI handler failed to register\n");
1001 }
1002 
1003 void uv_nmi_init(void)
1004 {
1005 	unsigned int value;
1006 
1007 	/*
1008 	 * Unmask NMI on all CPU's
1009 	 */
1010 	value = apic_read(APIC_LVT1) | APIC_DM_NMI;
1011 	value &= ~APIC_LVT_MASKED;
1012 	apic_write(APIC_LVT1, value);
1013 }
1014 
1015 /* Setup HUB NMI info */
1016 static void __init uv_nmi_setup_common(bool hubbed)
1017 {
1018 	int size = sizeof(void *) * (1 << NODES_SHIFT);
1019 	int cpu;
1020 
1021 	uv_hub_nmi_list = kzalloc(size, GFP_KERNEL);
1022 	nmi_debug("UV: NMI hub list @ 0x%p (%d)\n", uv_hub_nmi_list, size);
1023 	BUG_ON(!uv_hub_nmi_list);
1024 	size = sizeof(struct uv_hub_nmi_s);
1025 	for_each_present_cpu(cpu) {
1026 		int nid = cpu_to_node(cpu);
1027 		if (uv_hub_nmi_list[nid] == NULL) {
1028 			uv_hub_nmi_list[nid] = kzalloc_node(size,
1029 							    GFP_KERNEL, nid);
1030 			BUG_ON(!uv_hub_nmi_list[nid]);
1031 			raw_spin_lock_init(&(uv_hub_nmi_list[nid]->nmi_lock));
1032 			atomic_set(&uv_hub_nmi_list[nid]->cpu_owner, -1);
1033 			uv_hub_nmi_list[nid]->hub_present = hubbed;
1034 			uv_hub_nmi_list[nid]->pch_owner = (nid == 0);
1035 		}
1036 		uv_hub_nmi_per(cpu) = uv_hub_nmi_list[nid];
1037 	}
1038 	BUG_ON(!alloc_cpumask_var(&uv_nmi_cpu_mask, GFP_KERNEL));
1039 }
1040 
1041 /* Setup for UV Hub systems */
1042 void __init uv_nmi_setup(void)
1043 {
1044 	uv_nmi_setup_mmrs();
1045 	uv_nmi_setup_common(true);
1046 	uv_register_nmi_notifier();
1047 	pr_info("UV: Hub NMI enabled\n");
1048 }
1049 
1050 /* Setup for UV Hubless systems */
1051 void __init uv_nmi_setup_hubless(void)
1052 {
1053 	uv_nmi_setup_common(false);
1054 	pch_base = xlate_dev_mem_ptr(PCH_PCR_GPIO_1_BASE);
1055 	nmi_debug("UV: PCH base:%p from 0x%lx, GPP_D_0\n",
1056 		pch_base, PCH_PCR_GPIO_1_BASE);
1057 	if (uv_pch_init_enable)
1058 		uv_init_hubless_pch_d0();
1059 	uv_init_hubless_pch_io(GPI_NMI_ENA_GPP_D_0,
1060 				STS_GPP_D_0_MASK, STS_GPP_D_0_MASK);
1061 	uv_nmi_setup_hubless_intr();
1062 	/* Ensure NMI enabled in Processor Interface Reg: */
1063 	uv_reassert_nmi();
1064 	uv_register_nmi_notifier();
1065 	pr_info("UV: Hubless NMI enabled\n");
1066 }
1067