xref: /openbmc/linux/arch/powerpc/kvm/mpic.c (revision 15e3ae36)
1 /*
2  * OpenPIC emulation
3  *
4  * Copyright (c) 2004 Jocelyn Mayer
5  *               2011 Alexander Graf
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25 
26 #include <linux/slab.h>
27 #include <linux/mutex.h>
28 #include <linux/kvm_host.h>
29 #include <linux/errno.h>
30 #include <linux/fs.h>
31 #include <linux/anon_inodes.h>
32 #include <linux/uaccess.h>
33 #include <asm/mpic.h>
34 #include <asm/kvm_para.h>
35 #include <asm/kvm_ppc.h>
36 #include <kvm/iodev.h>
37 
38 #define MAX_CPU     32
39 #define MAX_SRC     256
40 #define MAX_TMR     4
41 #define MAX_IPI     4
42 #define MAX_MSI     8
43 #define MAX_IRQ     (MAX_SRC + MAX_IPI + MAX_TMR)
44 #define VID         0x03	/* MPIC version ID */
45 
46 /* OpenPIC capability flags */
47 #define OPENPIC_FLAG_IDR_CRIT     (1 << 0)
48 #define OPENPIC_FLAG_ILR          (2 << 0)
49 
50 /* OpenPIC address map */
51 #define OPENPIC_REG_SIZE             0x40000
52 #define OPENPIC_GLB_REG_START        0x0
53 #define OPENPIC_GLB_REG_SIZE         0x10F0
54 #define OPENPIC_TMR_REG_START        0x10F0
55 #define OPENPIC_TMR_REG_SIZE         0x220
56 #define OPENPIC_MSI_REG_START        0x1600
57 #define OPENPIC_MSI_REG_SIZE         0x200
58 #define OPENPIC_SUMMARY_REG_START    0x3800
59 #define OPENPIC_SUMMARY_REG_SIZE     0x800
60 #define OPENPIC_SRC_REG_START        0x10000
61 #define OPENPIC_SRC_REG_SIZE         (MAX_SRC * 0x20)
62 #define OPENPIC_CPU_REG_START        0x20000
63 #define OPENPIC_CPU_REG_SIZE         (0x100 + ((MAX_CPU - 1) * 0x1000))
64 
65 struct fsl_mpic_info {
66 	int max_ext;
67 };
68 
69 static struct fsl_mpic_info fsl_mpic_20 = {
70 	.max_ext = 12,
71 };
72 
73 static struct fsl_mpic_info fsl_mpic_42 = {
74 	.max_ext = 12,
75 };
76 
77 #define FRR_NIRQ_SHIFT    16
78 #define FRR_NCPU_SHIFT     8
79 #define FRR_VID_SHIFT      0
80 
81 #define VID_REVISION_1_2   2
82 #define VID_REVISION_1_3   3
83 
84 #define VIR_GENERIC      0x00000000	/* Generic Vendor ID */
85 
86 #define GCR_RESET        0x80000000
87 #define GCR_MODE_PASS    0x00000000
88 #define GCR_MODE_MIXED   0x20000000
89 #define GCR_MODE_PROXY   0x60000000
90 
91 #define TBCR_CI           0x80000000	/* count inhibit */
92 #define TCCR_TOG          0x80000000	/* toggles when decrement to zero */
93 
94 #define IDR_EP_SHIFT      31
95 #define IDR_EP_MASK       (1 << IDR_EP_SHIFT)
96 #define IDR_CI0_SHIFT     30
97 #define IDR_CI1_SHIFT     29
98 #define IDR_P1_SHIFT      1
99 #define IDR_P0_SHIFT      0
100 
101 #define ILR_INTTGT_MASK   0x000000ff
102 #define ILR_INTTGT_INT    0x00
103 #define ILR_INTTGT_CINT   0x01	/* critical */
104 #define ILR_INTTGT_MCP    0x02	/* machine check */
105 #define NUM_OUTPUTS       3
106 
107 #define MSIIR_OFFSET       0x140
108 #define MSIIR_SRS_SHIFT    29
109 #define MSIIR_SRS_MASK     (0x7 << MSIIR_SRS_SHIFT)
110 #define MSIIR_IBS_SHIFT    24
111 #define MSIIR_IBS_MASK     (0x1f << MSIIR_IBS_SHIFT)
112 
113 static int get_current_cpu(void)
114 {
115 #if defined(CONFIG_KVM) && defined(CONFIG_BOOKE)
116 	struct kvm_vcpu *vcpu = current->thread.kvm_vcpu;
117 	return vcpu ? vcpu->arch.irq_cpu_id : -1;
118 #else
119 	/* XXX */
120 	return -1;
121 #endif
122 }
123 
124 static int openpic_cpu_write_internal(void *opaque, gpa_t addr,
125 				      u32 val, int idx);
126 static int openpic_cpu_read_internal(void *opaque, gpa_t addr,
127 				     u32 *ptr, int idx);
128 static inline void write_IRQreg_idr(struct openpic *opp, int n_IRQ,
129 				    uint32_t val);
130 
131 enum irq_type {
132 	IRQ_TYPE_NORMAL = 0,
133 	IRQ_TYPE_FSLINT,	/* FSL internal interrupt -- level only */
134 	IRQ_TYPE_FSLSPECIAL,	/* FSL timer/IPI interrupt, edge, no polarity */
135 };
136 
137 struct irq_queue {
138 	/* Round up to the nearest 64 IRQs so that the queue length
139 	 * won't change when moving between 32 and 64 bit hosts.
140 	 */
141 	unsigned long queue[BITS_TO_LONGS((MAX_IRQ + 63) & ~63)];
142 	int next;
143 	int priority;
144 };
145 
146 struct irq_source {
147 	uint32_t ivpr;		/* IRQ vector/priority register */
148 	uint32_t idr;		/* IRQ destination register */
149 	uint32_t destmask;	/* bitmap of CPU destinations */
150 	int last_cpu;
151 	int output;		/* IRQ level, e.g. ILR_INTTGT_INT */
152 	int pending;		/* TRUE if IRQ is pending */
153 	enum irq_type type;
154 	bool level:1;		/* level-triggered */
155 	bool nomask:1;	/* critical interrupts ignore mask on some FSL MPICs */
156 };
157 
158 #define IVPR_MASK_SHIFT       31
159 #define IVPR_MASK_MASK        (1 << IVPR_MASK_SHIFT)
160 #define IVPR_ACTIVITY_SHIFT   30
161 #define IVPR_ACTIVITY_MASK    (1 << IVPR_ACTIVITY_SHIFT)
162 #define IVPR_MODE_SHIFT       29
163 #define IVPR_MODE_MASK        (1 << IVPR_MODE_SHIFT)
164 #define IVPR_POLARITY_SHIFT   23
165 #define IVPR_POLARITY_MASK    (1 << IVPR_POLARITY_SHIFT)
166 #define IVPR_SENSE_SHIFT      22
167 #define IVPR_SENSE_MASK       (1 << IVPR_SENSE_SHIFT)
168 
169 #define IVPR_PRIORITY_MASK     (0xF << 16)
170 #define IVPR_PRIORITY(_ivprr_) ((int)(((_ivprr_) & IVPR_PRIORITY_MASK) >> 16))
171 #define IVPR_VECTOR(opp, _ivprr_) ((_ivprr_) & (opp)->vector_mask)
172 
173 /* IDR[EP/CI] are only for FSL MPIC prior to v4.0 */
174 #define IDR_EP      0x80000000	/* external pin */
175 #define IDR_CI      0x40000000	/* critical interrupt */
176 
177 struct irq_dest {
178 	struct kvm_vcpu *vcpu;
179 
180 	int32_t ctpr;		/* CPU current task priority */
181 	struct irq_queue raised;
182 	struct irq_queue servicing;
183 
184 	/* Count of IRQ sources asserting on non-INT outputs */
185 	uint32_t outputs_active[NUM_OUTPUTS];
186 };
187 
188 #define MAX_MMIO_REGIONS 10
189 
190 struct openpic {
191 	struct kvm *kvm;
192 	struct kvm_device *dev;
193 	struct kvm_io_device mmio;
194 	const struct mem_reg *mmio_regions[MAX_MMIO_REGIONS];
195 	int num_mmio_regions;
196 
197 	gpa_t reg_base;
198 	spinlock_t lock;
199 
200 	/* Behavior control */
201 	struct fsl_mpic_info *fsl;
202 	uint32_t model;
203 	uint32_t flags;
204 	uint32_t nb_irqs;
205 	uint32_t vid;
206 	uint32_t vir;		/* Vendor identification register */
207 	uint32_t vector_mask;
208 	uint32_t tfrr_reset;
209 	uint32_t ivpr_reset;
210 	uint32_t idr_reset;
211 	uint32_t brr1;
212 	uint32_t mpic_mode_mask;
213 
214 	/* Global registers */
215 	uint32_t frr;		/* Feature reporting register */
216 	uint32_t gcr;		/* Global configuration register  */
217 	uint32_t pir;		/* Processor initialization register */
218 	uint32_t spve;		/* Spurious vector register */
219 	uint32_t tfrr;		/* Timer frequency reporting register */
220 	/* Source registers */
221 	struct irq_source src[MAX_IRQ];
222 	/* Local registers per output pin */
223 	struct irq_dest dst[MAX_CPU];
224 	uint32_t nb_cpus;
225 	/* Timer registers */
226 	struct {
227 		uint32_t tccr;	/* Global timer current count register */
228 		uint32_t tbcr;	/* Global timer base count register */
229 	} timers[MAX_TMR];
230 	/* Shared MSI registers */
231 	struct {
232 		uint32_t msir;	/* Shared Message Signaled Interrupt Register */
233 	} msi[MAX_MSI];
234 	uint32_t max_irq;
235 	uint32_t irq_ipi0;
236 	uint32_t irq_tim0;
237 	uint32_t irq_msi;
238 };
239 
240 
241 static void mpic_irq_raise(struct openpic *opp, struct irq_dest *dst,
242 			   int output)
243 {
244 	struct kvm_interrupt irq = {
245 		.irq = KVM_INTERRUPT_SET_LEVEL,
246 	};
247 
248 	if (!dst->vcpu) {
249 		pr_debug("%s: destination cpu %d does not exist\n",
250 			 __func__, (int)(dst - &opp->dst[0]));
251 		return;
252 	}
253 
254 	pr_debug("%s: cpu %d output %d\n", __func__, dst->vcpu->arch.irq_cpu_id,
255 		output);
256 
257 	if (output != ILR_INTTGT_INT)	/* TODO */
258 		return;
259 
260 	kvm_vcpu_ioctl_interrupt(dst->vcpu, &irq);
261 }
262 
263 static void mpic_irq_lower(struct openpic *opp, struct irq_dest *dst,
264 			   int output)
265 {
266 	if (!dst->vcpu) {
267 		pr_debug("%s: destination cpu %d does not exist\n",
268 			 __func__, (int)(dst - &opp->dst[0]));
269 		return;
270 	}
271 
272 	pr_debug("%s: cpu %d output %d\n", __func__, dst->vcpu->arch.irq_cpu_id,
273 		output);
274 
275 	if (output != ILR_INTTGT_INT)	/* TODO */
276 		return;
277 
278 	kvmppc_core_dequeue_external(dst->vcpu);
279 }
280 
281 static inline void IRQ_setbit(struct irq_queue *q, int n_IRQ)
282 {
283 	set_bit(n_IRQ, q->queue);
284 }
285 
286 static inline void IRQ_resetbit(struct irq_queue *q, int n_IRQ)
287 {
288 	clear_bit(n_IRQ, q->queue);
289 }
290 
291 static void IRQ_check(struct openpic *opp, struct irq_queue *q)
292 {
293 	int irq = -1;
294 	int next = -1;
295 	int priority = -1;
296 
297 	for (;;) {
298 		irq = find_next_bit(q->queue, opp->max_irq, irq + 1);
299 		if (irq == opp->max_irq)
300 			break;
301 
302 		pr_debug("IRQ_check: irq %d set ivpr_pr=%d pr=%d\n",
303 			irq, IVPR_PRIORITY(opp->src[irq].ivpr), priority);
304 
305 		if (IVPR_PRIORITY(opp->src[irq].ivpr) > priority) {
306 			next = irq;
307 			priority = IVPR_PRIORITY(opp->src[irq].ivpr);
308 		}
309 	}
310 
311 	q->next = next;
312 	q->priority = priority;
313 }
314 
315 static int IRQ_get_next(struct openpic *opp, struct irq_queue *q)
316 {
317 	/* XXX: optimize */
318 	IRQ_check(opp, q);
319 
320 	return q->next;
321 }
322 
323 static void IRQ_local_pipe(struct openpic *opp, int n_CPU, int n_IRQ,
324 			   bool active, bool was_active)
325 {
326 	struct irq_dest *dst;
327 	struct irq_source *src;
328 	int priority;
329 
330 	dst = &opp->dst[n_CPU];
331 	src = &opp->src[n_IRQ];
332 
333 	pr_debug("%s: IRQ %d active %d was %d\n",
334 		__func__, n_IRQ, active, was_active);
335 
336 	if (src->output != ILR_INTTGT_INT) {
337 		pr_debug("%s: output %d irq %d active %d was %d count %d\n",
338 			__func__, src->output, n_IRQ, active, was_active,
339 			dst->outputs_active[src->output]);
340 
341 		/* On Freescale MPIC, critical interrupts ignore priority,
342 		 * IACK, EOI, etc.  Before MPIC v4.1 they also ignore
343 		 * masking.
344 		 */
345 		if (active) {
346 			if (!was_active &&
347 			    dst->outputs_active[src->output]++ == 0) {
348 				pr_debug("%s: Raise OpenPIC output %d cpu %d irq %d\n",
349 					__func__, src->output, n_CPU, n_IRQ);
350 				mpic_irq_raise(opp, dst, src->output);
351 			}
352 		} else {
353 			if (was_active &&
354 			    --dst->outputs_active[src->output] == 0) {
355 				pr_debug("%s: Lower OpenPIC output %d cpu %d irq %d\n",
356 					__func__, src->output, n_CPU, n_IRQ);
357 				mpic_irq_lower(opp, dst, src->output);
358 			}
359 		}
360 
361 		return;
362 	}
363 
364 	priority = IVPR_PRIORITY(src->ivpr);
365 
366 	/* Even if the interrupt doesn't have enough priority,
367 	 * it is still raised, in case ctpr is lowered later.
368 	 */
369 	if (active)
370 		IRQ_setbit(&dst->raised, n_IRQ);
371 	else
372 		IRQ_resetbit(&dst->raised, n_IRQ);
373 
374 	IRQ_check(opp, &dst->raised);
375 
376 	if (active && priority <= dst->ctpr) {
377 		pr_debug("%s: IRQ %d priority %d too low for ctpr %d on CPU %d\n",
378 			__func__, n_IRQ, priority, dst->ctpr, n_CPU);
379 		active = 0;
380 	}
381 
382 	if (active) {
383 		if (IRQ_get_next(opp, &dst->servicing) >= 0 &&
384 		    priority <= dst->servicing.priority) {
385 			pr_debug("%s: IRQ %d is hidden by servicing IRQ %d on CPU %d\n",
386 				__func__, n_IRQ, dst->servicing.next, n_CPU);
387 		} else {
388 			pr_debug("%s: Raise OpenPIC INT output cpu %d irq %d/%d\n",
389 				__func__, n_CPU, n_IRQ, dst->raised.next);
390 			mpic_irq_raise(opp, dst, ILR_INTTGT_INT);
391 		}
392 	} else {
393 		IRQ_get_next(opp, &dst->servicing);
394 		if (dst->raised.priority > dst->ctpr &&
395 		    dst->raised.priority > dst->servicing.priority) {
396 			pr_debug("%s: IRQ %d inactive, IRQ %d prio %d above %d/%d, CPU %d\n",
397 				__func__, n_IRQ, dst->raised.next,
398 				dst->raised.priority, dst->ctpr,
399 				dst->servicing.priority, n_CPU);
400 			/* IRQ line stays asserted */
401 		} else {
402 			pr_debug("%s: IRQ %d inactive, current prio %d/%d, CPU %d\n",
403 				__func__, n_IRQ, dst->ctpr,
404 				dst->servicing.priority, n_CPU);
405 			mpic_irq_lower(opp, dst, ILR_INTTGT_INT);
406 		}
407 	}
408 }
409 
410 /* update pic state because registers for n_IRQ have changed value */
411 static void openpic_update_irq(struct openpic *opp, int n_IRQ)
412 {
413 	struct irq_source *src;
414 	bool active, was_active;
415 	int i;
416 
417 	src = &opp->src[n_IRQ];
418 	active = src->pending;
419 
420 	if ((src->ivpr & IVPR_MASK_MASK) && !src->nomask) {
421 		/* Interrupt source is disabled */
422 		pr_debug("%s: IRQ %d is disabled\n", __func__, n_IRQ);
423 		active = false;
424 	}
425 
426 	was_active = !!(src->ivpr & IVPR_ACTIVITY_MASK);
427 
428 	/*
429 	 * We don't have a similar check for already-active because
430 	 * ctpr may have changed and we need to withdraw the interrupt.
431 	 */
432 	if (!active && !was_active) {
433 		pr_debug("%s: IRQ %d is already inactive\n", __func__, n_IRQ);
434 		return;
435 	}
436 
437 	if (active)
438 		src->ivpr |= IVPR_ACTIVITY_MASK;
439 	else
440 		src->ivpr &= ~IVPR_ACTIVITY_MASK;
441 
442 	if (src->destmask == 0) {
443 		/* No target */
444 		pr_debug("%s: IRQ %d has no target\n", __func__, n_IRQ);
445 		return;
446 	}
447 
448 	if (src->destmask == (1 << src->last_cpu)) {
449 		/* Only one CPU is allowed to receive this IRQ */
450 		IRQ_local_pipe(opp, src->last_cpu, n_IRQ, active, was_active);
451 	} else if (!(src->ivpr & IVPR_MODE_MASK)) {
452 		/* Directed delivery mode */
453 		for (i = 0; i < opp->nb_cpus; i++) {
454 			if (src->destmask & (1 << i)) {
455 				IRQ_local_pipe(opp, i, n_IRQ, active,
456 					       was_active);
457 			}
458 		}
459 	} else {
460 		/* Distributed delivery mode */
461 		for (i = src->last_cpu + 1; i != src->last_cpu; i++) {
462 			if (i == opp->nb_cpus)
463 				i = 0;
464 
465 			if (src->destmask & (1 << i)) {
466 				IRQ_local_pipe(opp, i, n_IRQ, active,
467 					       was_active);
468 				src->last_cpu = i;
469 				break;
470 			}
471 		}
472 	}
473 }
474 
475 static void openpic_set_irq(void *opaque, int n_IRQ, int level)
476 {
477 	struct openpic *opp = opaque;
478 	struct irq_source *src;
479 
480 	if (n_IRQ >= MAX_IRQ) {
481 		WARN_ONCE(1, "%s: IRQ %d out of range\n", __func__, n_IRQ);
482 		return;
483 	}
484 
485 	src = &opp->src[n_IRQ];
486 	pr_debug("openpic: set irq %d = %d ivpr=0x%08x\n",
487 		n_IRQ, level, src->ivpr);
488 	if (src->level) {
489 		/* level-sensitive irq */
490 		src->pending = level;
491 		openpic_update_irq(opp, n_IRQ);
492 	} else {
493 		/* edge-sensitive irq */
494 		if (level) {
495 			src->pending = 1;
496 			openpic_update_irq(opp, n_IRQ);
497 		}
498 
499 		if (src->output != ILR_INTTGT_INT) {
500 			/* Edge-triggered interrupts shouldn't be used
501 			 * with non-INT delivery, but just in case,
502 			 * try to make it do something sane rather than
503 			 * cause an interrupt storm.  This is close to
504 			 * what you'd probably see happen in real hardware.
505 			 */
506 			src->pending = 0;
507 			openpic_update_irq(opp, n_IRQ);
508 		}
509 	}
510 }
511 
512 static void openpic_reset(struct openpic *opp)
513 {
514 	int i;
515 
516 	opp->gcr = GCR_RESET;
517 	/* Initialise controller registers */
518 	opp->frr = ((opp->nb_irqs - 1) << FRR_NIRQ_SHIFT) |
519 	    (opp->vid << FRR_VID_SHIFT);
520 
521 	opp->pir = 0;
522 	opp->spve = -1 & opp->vector_mask;
523 	opp->tfrr = opp->tfrr_reset;
524 	/* Initialise IRQ sources */
525 	for (i = 0; i < opp->max_irq; i++) {
526 		opp->src[i].ivpr = opp->ivpr_reset;
527 
528 		switch (opp->src[i].type) {
529 		case IRQ_TYPE_NORMAL:
530 			opp->src[i].level =
531 			    !!(opp->ivpr_reset & IVPR_SENSE_MASK);
532 			break;
533 
534 		case IRQ_TYPE_FSLINT:
535 			opp->src[i].ivpr |= IVPR_POLARITY_MASK;
536 			break;
537 
538 		case IRQ_TYPE_FSLSPECIAL:
539 			break;
540 		}
541 
542 		write_IRQreg_idr(opp, i, opp->idr_reset);
543 	}
544 	/* Initialise IRQ destinations */
545 	for (i = 0; i < MAX_CPU; i++) {
546 		opp->dst[i].ctpr = 15;
547 		memset(&opp->dst[i].raised, 0, sizeof(struct irq_queue));
548 		opp->dst[i].raised.next = -1;
549 		memset(&opp->dst[i].servicing, 0, sizeof(struct irq_queue));
550 		opp->dst[i].servicing.next = -1;
551 	}
552 	/* Initialise timers */
553 	for (i = 0; i < MAX_TMR; i++) {
554 		opp->timers[i].tccr = 0;
555 		opp->timers[i].tbcr = TBCR_CI;
556 	}
557 	/* Go out of RESET state */
558 	opp->gcr = 0;
559 }
560 
561 static inline uint32_t read_IRQreg_idr(struct openpic *opp, int n_IRQ)
562 {
563 	return opp->src[n_IRQ].idr;
564 }
565 
566 static inline uint32_t read_IRQreg_ilr(struct openpic *opp, int n_IRQ)
567 {
568 	if (opp->flags & OPENPIC_FLAG_ILR)
569 		return opp->src[n_IRQ].output;
570 
571 	return 0xffffffff;
572 }
573 
574 static inline uint32_t read_IRQreg_ivpr(struct openpic *opp, int n_IRQ)
575 {
576 	return opp->src[n_IRQ].ivpr;
577 }
578 
579 static inline void write_IRQreg_idr(struct openpic *opp, int n_IRQ,
580 				    uint32_t val)
581 {
582 	struct irq_source *src = &opp->src[n_IRQ];
583 	uint32_t normal_mask = (1UL << opp->nb_cpus) - 1;
584 	uint32_t crit_mask = 0;
585 	uint32_t mask = normal_mask;
586 	int crit_shift = IDR_EP_SHIFT - opp->nb_cpus;
587 	int i;
588 
589 	if (opp->flags & OPENPIC_FLAG_IDR_CRIT) {
590 		crit_mask = mask << crit_shift;
591 		mask |= crit_mask | IDR_EP;
592 	}
593 
594 	src->idr = val & mask;
595 	pr_debug("Set IDR %d to 0x%08x\n", n_IRQ, src->idr);
596 
597 	if (opp->flags & OPENPIC_FLAG_IDR_CRIT) {
598 		if (src->idr & crit_mask) {
599 			if (src->idr & normal_mask) {
600 				pr_debug("%s: IRQ configured for multiple output types, using critical\n",
601 					__func__);
602 			}
603 
604 			src->output = ILR_INTTGT_CINT;
605 			src->nomask = true;
606 			src->destmask = 0;
607 
608 			for (i = 0; i < opp->nb_cpus; i++) {
609 				int n_ci = IDR_CI0_SHIFT - i;
610 
611 				if (src->idr & (1UL << n_ci))
612 					src->destmask |= 1UL << i;
613 			}
614 		} else {
615 			src->output = ILR_INTTGT_INT;
616 			src->nomask = false;
617 			src->destmask = src->idr & normal_mask;
618 		}
619 	} else {
620 		src->destmask = src->idr;
621 	}
622 }
623 
624 static inline void write_IRQreg_ilr(struct openpic *opp, int n_IRQ,
625 				    uint32_t val)
626 {
627 	if (opp->flags & OPENPIC_FLAG_ILR) {
628 		struct irq_source *src = &opp->src[n_IRQ];
629 
630 		src->output = val & ILR_INTTGT_MASK;
631 		pr_debug("Set ILR %d to 0x%08x, output %d\n", n_IRQ, src->idr,
632 			src->output);
633 
634 		/* TODO: on MPIC v4.0 only, set nomask for non-INT */
635 	}
636 }
637 
638 static inline void write_IRQreg_ivpr(struct openpic *opp, int n_IRQ,
639 				     uint32_t val)
640 {
641 	uint32_t mask;
642 
643 	/* NOTE when implementing newer FSL MPIC models: starting with v4.0,
644 	 * the polarity bit is read-only on internal interrupts.
645 	 */
646 	mask = IVPR_MASK_MASK | IVPR_PRIORITY_MASK | IVPR_SENSE_MASK |
647 	    IVPR_POLARITY_MASK | opp->vector_mask;
648 
649 	/* ACTIVITY bit is read-only */
650 	opp->src[n_IRQ].ivpr =
651 	    (opp->src[n_IRQ].ivpr & IVPR_ACTIVITY_MASK) | (val & mask);
652 
653 	/* For FSL internal interrupts, The sense bit is reserved and zero,
654 	 * and the interrupt is always level-triggered.  Timers and IPIs
655 	 * have no sense or polarity bits, and are edge-triggered.
656 	 */
657 	switch (opp->src[n_IRQ].type) {
658 	case IRQ_TYPE_NORMAL:
659 		opp->src[n_IRQ].level =
660 		    !!(opp->src[n_IRQ].ivpr & IVPR_SENSE_MASK);
661 		break;
662 
663 	case IRQ_TYPE_FSLINT:
664 		opp->src[n_IRQ].ivpr &= ~IVPR_SENSE_MASK;
665 		break;
666 
667 	case IRQ_TYPE_FSLSPECIAL:
668 		opp->src[n_IRQ].ivpr &= ~(IVPR_POLARITY_MASK | IVPR_SENSE_MASK);
669 		break;
670 	}
671 
672 	openpic_update_irq(opp, n_IRQ);
673 	pr_debug("Set IVPR %d to 0x%08x -> 0x%08x\n", n_IRQ, val,
674 		opp->src[n_IRQ].ivpr);
675 }
676 
677 static void openpic_gcr_write(struct openpic *opp, uint64_t val)
678 {
679 	if (val & GCR_RESET) {
680 		openpic_reset(opp);
681 		return;
682 	}
683 
684 	opp->gcr &= ~opp->mpic_mode_mask;
685 	opp->gcr |= val & opp->mpic_mode_mask;
686 }
687 
688 static int openpic_gbl_write(void *opaque, gpa_t addr, u32 val)
689 {
690 	struct openpic *opp = opaque;
691 	int err = 0;
692 
693 	pr_debug("%s: addr %#llx <= %08x\n", __func__, addr, val);
694 	if (addr & 0xF)
695 		return 0;
696 
697 	switch (addr) {
698 	case 0x00:	/* Block Revision Register1 (BRR1) is Readonly */
699 		break;
700 	case 0x40:
701 	case 0x50:
702 	case 0x60:
703 	case 0x70:
704 	case 0x80:
705 	case 0x90:
706 	case 0xA0:
707 	case 0xB0:
708 		err = openpic_cpu_write_internal(opp, addr, val,
709 						 get_current_cpu());
710 		break;
711 	case 0x1000:		/* FRR */
712 		break;
713 	case 0x1020:		/* GCR */
714 		openpic_gcr_write(opp, val);
715 		break;
716 	case 0x1080:		/* VIR */
717 		break;
718 	case 0x1090:		/* PIR */
719 		/*
720 		 * This register is used to reset a CPU core --
721 		 * let userspace handle it.
722 		 */
723 		err = -ENXIO;
724 		break;
725 	case 0x10A0:		/* IPI_IVPR */
726 	case 0x10B0:
727 	case 0x10C0:
728 	case 0x10D0: {
729 		int idx;
730 		idx = (addr - 0x10A0) >> 4;
731 		write_IRQreg_ivpr(opp, opp->irq_ipi0 + idx, val);
732 		break;
733 	}
734 	case 0x10E0:		/* SPVE */
735 		opp->spve = val & opp->vector_mask;
736 		break;
737 	default:
738 		break;
739 	}
740 
741 	return err;
742 }
743 
744 static int openpic_gbl_read(void *opaque, gpa_t addr, u32 *ptr)
745 {
746 	struct openpic *opp = opaque;
747 	u32 retval;
748 	int err = 0;
749 
750 	pr_debug("%s: addr %#llx\n", __func__, addr);
751 	retval = 0xFFFFFFFF;
752 	if (addr & 0xF)
753 		goto out;
754 
755 	switch (addr) {
756 	case 0x1000:		/* FRR */
757 		retval = opp->frr;
758 		retval |= (opp->nb_cpus - 1) << FRR_NCPU_SHIFT;
759 		break;
760 	case 0x1020:		/* GCR */
761 		retval = opp->gcr;
762 		break;
763 	case 0x1080:		/* VIR */
764 		retval = opp->vir;
765 		break;
766 	case 0x1090:		/* PIR */
767 		retval = 0x00000000;
768 		break;
769 	case 0x00:		/* Block Revision Register1 (BRR1) */
770 		retval = opp->brr1;
771 		break;
772 	case 0x40:
773 	case 0x50:
774 	case 0x60:
775 	case 0x70:
776 	case 0x80:
777 	case 0x90:
778 	case 0xA0:
779 	case 0xB0:
780 		err = openpic_cpu_read_internal(opp, addr,
781 			&retval, get_current_cpu());
782 		break;
783 	case 0x10A0:		/* IPI_IVPR */
784 	case 0x10B0:
785 	case 0x10C0:
786 	case 0x10D0:
787 		{
788 			int idx;
789 			idx = (addr - 0x10A0) >> 4;
790 			retval = read_IRQreg_ivpr(opp, opp->irq_ipi0 + idx);
791 		}
792 		break;
793 	case 0x10E0:		/* SPVE */
794 		retval = opp->spve;
795 		break;
796 	default:
797 		break;
798 	}
799 
800 out:
801 	pr_debug("%s: => 0x%08x\n", __func__, retval);
802 	*ptr = retval;
803 	return err;
804 }
805 
806 static int openpic_tmr_write(void *opaque, gpa_t addr, u32 val)
807 {
808 	struct openpic *opp = opaque;
809 	int idx;
810 
811 	addr += 0x10f0;
812 
813 	pr_debug("%s: addr %#llx <= %08x\n", __func__, addr, val);
814 	if (addr & 0xF)
815 		return 0;
816 
817 	if (addr == 0x10f0) {
818 		/* TFRR */
819 		opp->tfrr = val;
820 		return 0;
821 	}
822 
823 	idx = (addr >> 6) & 0x3;
824 	addr = addr & 0x30;
825 
826 	switch (addr & 0x30) {
827 	case 0x00:		/* TCCR */
828 		break;
829 	case 0x10:		/* TBCR */
830 		if ((opp->timers[idx].tccr & TCCR_TOG) != 0 &&
831 		    (val & TBCR_CI) == 0 &&
832 		    (opp->timers[idx].tbcr & TBCR_CI) != 0)
833 			opp->timers[idx].tccr &= ~TCCR_TOG;
834 
835 		opp->timers[idx].tbcr = val;
836 		break;
837 	case 0x20:		/* TVPR */
838 		write_IRQreg_ivpr(opp, opp->irq_tim0 + idx, val);
839 		break;
840 	case 0x30:		/* TDR */
841 		write_IRQreg_idr(opp, opp->irq_tim0 + idx, val);
842 		break;
843 	}
844 
845 	return 0;
846 }
847 
848 static int openpic_tmr_read(void *opaque, gpa_t addr, u32 *ptr)
849 {
850 	struct openpic *opp = opaque;
851 	uint32_t retval = -1;
852 	int idx;
853 
854 	pr_debug("%s: addr %#llx\n", __func__, addr);
855 	if (addr & 0xF)
856 		goto out;
857 
858 	idx = (addr >> 6) & 0x3;
859 	if (addr == 0x0) {
860 		/* TFRR */
861 		retval = opp->tfrr;
862 		goto out;
863 	}
864 
865 	switch (addr & 0x30) {
866 	case 0x00:		/* TCCR */
867 		retval = opp->timers[idx].tccr;
868 		break;
869 	case 0x10:		/* TBCR */
870 		retval = opp->timers[idx].tbcr;
871 		break;
872 	case 0x20:		/* TIPV */
873 		retval = read_IRQreg_ivpr(opp, opp->irq_tim0 + idx);
874 		break;
875 	case 0x30:		/* TIDE (TIDR) */
876 		retval = read_IRQreg_idr(opp, opp->irq_tim0 + idx);
877 		break;
878 	}
879 
880 out:
881 	pr_debug("%s: => 0x%08x\n", __func__, retval);
882 	*ptr = retval;
883 	return 0;
884 }
885 
886 static int openpic_src_write(void *opaque, gpa_t addr, u32 val)
887 {
888 	struct openpic *opp = opaque;
889 	int idx;
890 
891 	pr_debug("%s: addr %#llx <= %08x\n", __func__, addr, val);
892 
893 	addr = addr & 0xffff;
894 	idx = addr >> 5;
895 
896 	switch (addr & 0x1f) {
897 	case 0x00:
898 		write_IRQreg_ivpr(opp, idx, val);
899 		break;
900 	case 0x10:
901 		write_IRQreg_idr(opp, idx, val);
902 		break;
903 	case 0x18:
904 		write_IRQreg_ilr(opp, idx, val);
905 		break;
906 	}
907 
908 	return 0;
909 }
910 
911 static int openpic_src_read(void *opaque, gpa_t addr, u32 *ptr)
912 {
913 	struct openpic *opp = opaque;
914 	uint32_t retval;
915 	int idx;
916 
917 	pr_debug("%s: addr %#llx\n", __func__, addr);
918 	retval = 0xFFFFFFFF;
919 
920 	addr = addr & 0xffff;
921 	idx = addr >> 5;
922 
923 	switch (addr & 0x1f) {
924 	case 0x00:
925 		retval = read_IRQreg_ivpr(opp, idx);
926 		break;
927 	case 0x10:
928 		retval = read_IRQreg_idr(opp, idx);
929 		break;
930 	case 0x18:
931 		retval = read_IRQreg_ilr(opp, idx);
932 		break;
933 	}
934 
935 	pr_debug("%s: => 0x%08x\n", __func__, retval);
936 	*ptr = retval;
937 	return 0;
938 }
939 
940 static int openpic_msi_write(void *opaque, gpa_t addr, u32 val)
941 {
942 	struct openpic *opp = opaque;
943 	int idx = opp->irq_msi;
944 	int srs, ibs;
945 
946 	pr_debug("%s: addr %#llx <= 0x%08x\n", __func__, addr, val);
947 	if (addr & 0xF)
948 		return 0;
949 
950 	switch (addr) {
951 	case MSIIR_OFFSET:
952 		srs = val >> MSIIR_SRS_SHIFT;
953 		idx += srs;
954 		ibs = (val & MSIIR_IBS_MASK) >> MSIIR_IBS_SHIFT;
955 		opp->msi[srs].msir |= 1 << ibs;
956 		openpic_set_irq(opp, idx, 1);
957 		break;
958 	default:
959 		/* most registers are read-only, thus ignored */
960 		break;
961 	}
962 
963 	return 0;
964 }
965 
966 static int openpic_msi_read(void *opaque, gpa_t addr, u32 *ptr)
967 {
968 	struct openpic *opp = opaque;
969 	uint32_t r = 0;
970 	int i, srs;
971 
972 	pr_debug("%s: addr %#llx\n", __func__, addr);
973 	if (addr & 0xF)
974 		return -ENXIO;
975 
976 	srs = addr >> 4;
977 
978 	switch (addr) {
979 	case 0x00:
980 	case 0x10:
981 	case 0x20:
982 	case 0x30:
983 	case 0x40:
984 	case 0x50:
985 	case 0x60:
986 	case 0x70:		/* MSIRs */
987 		r = opp->msi[srs].msir;
988 		/* Clear on read */
989 		opp->msi[srs].msir = 0;
990 		openpic_set_irq(opp, opp->irq_msi + srs, 0);
991 		break;
992 	case 0x120:		/* MSISR */
993 		for (i = 0; i < MAX_MSI; i++)
994 			r |= (opp->msi[i].msir ? 1 : 0) << i;
995 		break;
996 	}
997 
998 	pr_debug("%s: => 0x%08x\n", __func__, r);
999 	*ptr = r;
1000 	return 0;
1001 }
1002 
1003 static int openpic_summary_read(void *opaque, gpa_t addr, u32 *ptr)
1004 {
1005 	uint32_t r = 0;
1006 
1007 	pr_debug("%s: addr %#llx\n", __func__, addr);
1008 
1009 	/* TODO: EISR/EIMR */
1010 
1011 	*ptr = r;
1012 	return 0;
1013 }
1014 
1015 static int openpic_summary_write(void *opaque, gpa_t addr, u32 val)
1016 {
1017 	pr_debug("%s: addr %#llx <= 0x%08x\n", __func__, addr, val);
1018 
1019 	/* TODO: EISR/EIMR */
1020 	return 0;
1021 }
1022 
1023 static int openpic_cpu_write_internal(void *opaque, gpa_t addr,
1024 				      u32 val, int idx)
1025 {
1026 	struct openpic *opp = opaque;
1027 	struct irq_source *src;
1028 	struct irq_dest *dst;
1029 	int s_IRQ, n_IRQ;
1030 
1031 	pr_debug("%s: cpu %d addr %#llx <= 0x%08x\n", __func__, idx,
1032 		addr, val);
1033 
1034 	if (idx < 0)
1035 		return 0;
1036 
1037 	if (addr & 0xF)
1038 		return 0;
1039 
1040 	dst = &opp->dst[idx];
1041 	addr &= 0xFF0;
1042 	switch (addr) {
1043 	case 0x40:		/* IPIDR */
1044 	case 0x50:
1045 	case 0x60:
1046 	case 0x70:
1047 		idx = (addr - 0x40) >> 4;
1048 		/* we use IDE as mask which CPUs to deliver the IPI to still. */
1049 		opp->src[opp->irq_ipi0 + idx].destmask |= val;
1050 		openpic_set_irq(opp, opp->irq_ipi0 + idx, 1);
1051 		openpic_set_irq(opp, opp->irq_ipi0 + idx, 0);
1052 		break;
1053 	case 0x80:		/* CTPR */
1054 		dst->ctpr = val & 0x0000000F;
1055 
1056 		pr_debug("%s: set CPU %d ctpr to %d, raised %d servicing %d\n",
1057 			__func__, idx, dst->ctpr, dst->raised.priority,
1058 			dst->servicing.priority);
1059 
1060 		if (dst->raised.priority <= dst->ctpr) {
1061 			pr_debug("%s: Lower OpenPIC INT output cpu %d due to ctpr\n",
1062 				__func__, idx);
1063 			mpic_irq_lower(opp, dst, ILR_INTTGT_INT);
1064 		} else if (dst->raised.priority > dst->servicing.priority) {
1065 			pr_debug("%s: Raise OpenPIC INT output cpu %d irq %d\n",
1066 				__func__, idx, dst->raised.next);
1067 			mpic_irq_raise(opp, dst, ILR_INTTGT_INT);
1068 		}
1069 
1070 		break;
1071 	case 0x90:		/* WHOAMI */
1072 		/* Read-only register */
1073 		break;
1074 	case 0xA0:		/* IACK */
1075 		/* Read-only register */
1076 		break;
1077 	case 0xB0: {		/* EOI */
1078 		int notify_eoi;
1079 
1080 		pr_debug("EOI\n");
1081 		s_IRQ = IRQ_get_next(opp, &dst->servicing);
1082 
1083 		if (s_IRQ < 0) {
1084 			pr_debug("%s: EOI with no interrupt in service\n",
1085 				__func__);
1086 			break;
1087 		}
1088 
1089 		IRQ_resetbit(&dst->servicing, s_IRQ);
1090 		/* Notify listeners that the IRQ is over */
1091 		notify_eoi = s_IRQ;
1092 		/* Set up next servicing IRQ */
1093 		s_IRQ = IRQ_get_next(opp, &dst->servicing);
1094 		/* Check queued interrupts. */
1095 		n_IRQ = IRQ_get_next(opp, &dst->raised);
1096 		src = &opp->src[n_IRQ];
1097 		if (n_IRQ != -1 &&
1098 		    (s_IRQ == -1 ||
1099 		     IVPR_PRIORITY(src->ivpr) > dst->servicing.priority)) {
1100 			pr_debug("Raise OpenPIC INT output cpu %d irq %d\n",
1101 				idx, n_IRQ);
1102 			mpic_irq_raise(opp, dst, ILR_INTTGT_INT);
1103 		}
1104 
1105 		spin_unlock(&opp->lock);
1106 		kvm_notify_acked_irq(opp->kvm, 0, notify_eoi);
1107 		spin_lock(&opp->lock);
1108 
1109 		break;
1110 	}
1111 	default:
1112 		break;
1113 	}
1114 
1115 	return 0;
1116 }
1117 
1118 static int openpic_cpu_write(void *opaque, gpa_t addr, u32 val)
1119 {
1120 	struct openpic *opp = opaque;
1121 
1122 	return openpic_cpu_write_internal(opp, addr, val,
1123 					 (addr & 0x1f000) >> 12);
1124 }
1125 
1126 static uint32_t openpic_iack(struct openpic *opp, struct irq_dest *dst,
1127 			     int cpu)
1128 {
1129 	struct irq_source *src;
1130 	int retval, irq;
1131 
1132 	pr_debug("Lower OpenPIC INT output\n");
1133 	mpic_irq_lower(opp, dst, ILR_INTTGT_INT);
1134 
1135 	irq = IRQ_get_next(opp, &dst->raised);
1136 	pr_debug("IACK: irq=%d\n", irq);
1137 
1138 	if (irq == -1)
1139 		/* No more interrupt pending */
1140 		return opp->spve;
1141 
1142 	src = &opp->src[irq];
1143 	if (!(src->ivpr & IVPR_ACTIVITY_MASK) ||
1144 	    !(IVPR_PRIORITY(src->ivpr) > dst->ctpr)) {
1145 		pr_err("%s: bad raised IRQ %d ctpr %d ivpr 0x%08x\n",
1146 			__func__, irq, dst->ctpr, src->ivpr);
1147 		openpic_update_irq(opp, irq);
1148 		retval = opp->spve;
1149 	} else {
1150 		/* IRQ enter servicing state */
1151 		IRQ_setbit(&dst->servicing, irq);
1152 		retval = IVPR_VECTOR(opp, src->ivpr);
1153 	}
1154 
1155 	if (!src->level) {
1156 		/* edge-sensitive IRQ */
1157 		src->ivpr &= ~IVPR_ACTIVITY_MASK;
1158 		src->pending = 0;
1159 		IRQ_resetbit(&dst->raised, irq);
1160 	}
1161 
1162 	if ((irq >= opp->irq_ipi0) && (irq < (opp->irq_ipi0 + MAX_IPI))) {
1163 		src->destmask &= ~(1 << cpu);
1164 		if (src->destmask && !src->level) {
1165 			/* trigger on CPUs that didn't know about it yet */
1166 			openpic_set_irq(opp, irq, 1);
1167 			openpic_set_irq(opp, irq, 0);
1168 			/* if all CPUs knew about it, set active bit again */
1169 			src->ivpr |= IVPR_ACTIVITY_MASK;
1170 		}
1171 	}
1172 
1173 	return retval;
1174 }
1175 
1176 void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu)
1177 {
1178 	struct openpic *opp = vcpu->arch.mpic;
1179 	int cpu = vcpu->arch.irq_cpu_id;
1180 	unsigned long flags;
1181 
1182 	spin_lock_irqsave(&opp->lock, flags);
1183 
1184 	if ((opp->gcr & opp->mpic_mode_mask) == GCR_MODE_PROXY)
1185 		kvmppc_set_epr(vcpu, openpic_iack(opp, &opp->dst[cpu], cpu));
1186 
1187 	spin_unlock_irqrestore(&opp->lock, flags);
1188 }
1189 
1190 static int openpic_cpu_read_internal(void *opaque, gpa_t addr,
1191 				     u32 *ptr, int idx)
1192 {
1193 	struct openpic *opp = opaque;
1194 	struct irq_dest *dst;
1195 	uint32_t retval;
1196 
1197 	pr_debug("%s: cpu %d addr %#llx\n", __func__, idx, addr);
1198 	retval = 0xFFFFFFFF;
1199 
1200 	if (idx < 0)
1201 		goto out;
1202 
1203 	if (addr & 0xF)
1204 		goto out;
1205 
1206 	dst = &opp->dst[idx];
1207 	addr &= 0xFF0;
1208 	switch (addr) {
1209 	case 0x80:		/* CTPR */
1210 		retval = dst->ctpr;
1211 		break;
1212 	case 0x90:		/* WHOAMI */
1213 		retval = idx;
1214 		break;
1215 	case 0xA0:		/* IACK */
1216 		retval = openpic_iack(opp, dst, idx);
1217 		break;
1218 	case 0xB0:		/* EOI */
1219 		retval = 0;
1220 		break;
1221 	default:
1222 		break;
1223 	}
1224 	pr_debug("%s: => 0x%08x\n", __func__, retval);
1225 
1226 out:
1227 	*ptr = retval;
1228 	return 0;
1229 }
1230 
1231 static int openpic_cpu_read(void *opaque, gpa_t addr, u32 *ptr)
1232 {
1233 	struct openpic *opp = opaque;
1234 
1235 	return openpic_cpu_read_internal(opp, addr, ptr,
1236 					 (addr & 0x1f000) >> 12);
1237 }
1238 
1239 struct mem_reg {
1240 	int (*read)(void *opaque, gpa_t addr, u32 *ptr);
1241 	int (*write)(void *opaque, gpa_t addr, u32 val);
1242 	gpa_t start_addr;
1243 	int size;
1244 };
1245 
1246 static const struct mem_reg openpic_gbl_mmio = {
1247 	.write = openpic_gbl_write,
1248 	.read = openpic_gbl_read,
1249 	.start_addr = OPENPIC_GLB_REG_START,
1250 	.size = OPENPIC_GLB_REG_SIZE,
1251 };
1252 
1253 static const struct mem_reg openpic_tmr_mmio = {
1254 	.write = openpic_tmr_write,
1255 	.read = openpic_tmr_read,
1256 	.start_addr = OPENPIC_TMR_REG_START,
1257 	.size = OPENPIC_TMR_REG_SIZE,
1258 };
1259 
1260 static const struct mem_reg openpic_cpu_mmio = {
1261 	.write = openpic_cpu_write,
1262 	.read = openpic_cpu_read,
1263 	.start_addr = OPENPIC_CPU_REG_START,
1264 	.size = OPENPIC_CPU_REG_SIZE,
1265 };
1266 
1267 static const struct mem_reg openpic_src_mmio = {
1268 	.write = openpic_src_write,
1269 	.read = openpic_src_read,
1270 	.start_addr = OPENPIC_SRC_REG_START,
1271 	.size = OPENPIC_SRC_REG_SIZE,
1272 };
1273 
1274 static const struct mem_reg openpic_msi_mmio = {
1275 	.read = openpic_msi_read,
1276 	.write = openpic_msi_write,
1277 	.start_addr = OPENPIC_MSI_REG_START,
1278 	.size = OPENPIC_MSI_REG_SIZE,
1279 };
1280 
1281 static const struct mem_reg openpic_summary_mmio = {
1282 	.read = openpic_summary_read,
1283 	.write = openpic_summary_write,
1284 	.start_addr = OPENPIC_SUMMARY_REG_START,
1285 	.size = OPENPIC_SUMMARY_REG_SIZE,
1286 };
1287 
1288 static void add_mmio_region(struct openpic *opp, const struct mem_reg *mr)
1289 {
1290 	if (opp->num_mmio_regions >= MAX_MMIO_REGIONS) {
1291 		WARN(1, "kvm mpic: too many mmio regions\n");
1292 		return;
1293 	}
1294 
1295 	opp->mmio_regions[opp->num_mmio_regions++] = mr;
1296 }
1297 
1298 static void fsl_common_init(struct openpic *opp)
1299 {
1300 	int i;
1301 	int virq = MAX_SRC;
1302 
1303 	add_mmio_region(opp, &openpic_msi_mmio);
1304 	add_mmio_region(opp, &openpic_summary_mmio);
1305 
1306 	opp->vid = VID_REVISION_1_2;
1307 	opp->vir = VIR_GENERIC;
1308 	opp->vector_mask = 0xFFFF;
1309 	opp->tfrr_reset = 0;
1310 	opp->ivpr_reset = IVPR_MASK_MASK;
1311 	opp->idr_reset = 1 << 0;
1312 	opp->max_irq = MAX_IRQ;
1313 
1314 	opp->irq_ipi0 = virq;
1315 	virq += MAX_IPI;
1316 	opp->irq_tim0 = virq;
1317 	virq += MAX_TMR;
1318 
1319 	BUG_ON(virq > MAX_IRQ);
1320 
1321 	opp->irq_msi = 224;
1322 
1323 	for (i = 0; i < opp->fsl->max_ext; i++)
1324 		opp->src[i].level = false;
1325 
1326 	/* Internal interrupts, including message and MSI */
1327 	for (i = 16; i < MAX_SRC; i++) {
1328 		opp->src[i].type = IRQ_TYPE_FSLINT;
1329 		opp->src[i].level = true;
1330 	}
1331 
1332 	/* timers and IPIs */
1333 	for (i = MAX_SRC; i < virq; i++) {
1334 		opp->src[i].type = IRQ_TYPE_FSLSPECIAL;
1335 		opp->src[i].level = false;
1336 	}
1337 }
1338 
1339 static int kvm_mpic_read_internal(struct openpic *opp, gpa_t addr, u32 *ptr)
1340 {
1341 	int i;
1342 
1343 	for (i = 0; i < opp->num_mmio_regions; i++) {
1344 		const struct mem_reg *mr = opp->mmio_regions[i];
1345 
1346 		if (mr->start_addr > addr || addr >= mr->start_addr + mr->size)
1347 			continue;
1348 
1349 		return mr->read(opp, addr - mr->start_addr, ptr);
1350 	}
1351 
1352 	return -ENXIO;
1353 }
1354 
1355 static int kvm_mpic_write_internal(struct openpic *opp, gpa_t addr, u32 val)
1356 {
1357 	int i;
1358 
1359 	for (i = 0; i < opp->num_mmio_regions; i++) {
1360 		const struct mem_reg *mr = opp->mmio_regions[i];
1361 
1362 		if (mr->start_addr > addr || addr >= mr->start_addr + mr->size)
1363 			continue;
1364 
1365 		return mr->write(opp, addr - mr->start_addr, val);
1366 	}
1367 
1368 	return -ENXIO;
1369 }
1370 
1371 static int kvm_mpic_read(struct kvm_vcpu *vcpu,
1372 			 struct kvm_io_device *this,
1373 			 gpa_t addr, int len, void *ptr)
1374 {
1375 	struct openpic *opp = container_of(this, struct openpic, mmio);
1376 	int ret;
1377 	union {
1378 		u32 val;
1379 		u8 bytes[4];
1380 	} u;
1381 
1382 	if (addr & (len - 1)) {
1383 		pr_debug("%s: bad alignment %llx/%d\n",
1384 			 __func__, addr, len);
1385 		return -EINVAL;
1386 	}
1387 
1388 	spin_lock_irq(&opp->lock);
1389 	ret = kvm_mpic_read_internal(opp, addr - opp->reg_base, &u.val);
1390 	spin_unlock_irq(&opp->lock);
1391 
1392 	/*
1393 	 * Technically only 32-bit accesses are allowed, but be nice to
1394 	 * people dumping registers a byte at a time -- it works in real
1395 	 * hardware (reads only, not writes).
1396 	 */
1397 	if (len == 4) {
1398 		*(u32 *)ptr = u.val;
1399 		pr_debug("%s: addr %llx ret %d len 4 val %x\n",
1400 			 __func__, addr, ret, u.val);
1401 	} else if (len == 1) {
1402 		*(u8 *)ptr = u.bytes[addr & 3];
1403 		pr_debug("%s: addr %llx ret %d len 1 val %x\n",
1404 			 __func__, addr, ret, u.bytes[addr & 3]);
1405 	} else {
1406 		pr_debug("%s: bad length %d\n", __func__, len);
1407 		return -EINVAL;
1408 	}
1409 
1410 	return ret;
1411 }
1412 
1413 static int kvm_mpic_write(struct kvm_vcpu *vcpu,
1414 			  struct kvm_io_device *this,
1415 			  gpa_t addr, int len, const void *ptr)
1416 {
1417 	struct openpic *opp = container_of(this, struct openpic, mmio);
1418 	int ret;
1419 
1420 	if (len != 4) {
1421 		pr_debug("%s: bad length %d\n", __func__, len);
1422 		return -EOPNOTSUPP;
1423 	}
1424 	if (addr & 3) {
1425 		pr_debug("%s: bad alignment %llx/%d\n", __func__, addr, len);
1426 		return -EOPNOTSUPP;
1427 	}
1428 
1429 	spin_lock_irq(&opp->lock);
1430 	ret = kvm_mpic_write_internal(opp, addr - opp->reg_base,
1431 				      *(const u32 *)ptr);
1432 	spin_unlock_irq(&opp->lock);
1433 
1434 	pr_debug("%s: addr %llx ret %d val %x\n",
1435 		 __func__, addr, ret, *(const u32 *)ptr);
1436 
1437 	return ret;
1438 }
1439 
1440 static const struct kvm_io_device_ops mpic_mmio_ops = {
1441 	.read = kvm_mpic_read,
1442 	.write = kvm_mpic_write,
1443 };
1444 
1445 static void map_mmio(struct openpic *opp)
1446 {
1447 	kvm_iodevice_init(&opp->mmio, &mpic_mmio_ops);
1448 
1449 	kvm_io_bus_register_dev(opp->kvm, KVM_MMIO_BUS,
1450 				opp->reg_base, OPENPIC_REG_SIZE,
1451 				&opp->mmio);
1452 }
1453 
1454 static void unmap_mmio(struct openpic *opp)
1455 {
1456 	kvm_io_bus_unregister_dev(opp->kvm, KVM_MMIO_BUS, &opp->mmio);
1457 }
1458 
1459 static int set_base_addr(struct openpic *opp, struct kvm_device_attr *attr)
1460 {
1461 	u64 base;
1462 
1463 	if (copy_from_user(&base, (u64 __user *)(long)attr->addr, sizeof(u64)))
1464 		return -EFAULT;
1465 
1466 	if (base & 0x3ffff) {
1467 		pr_debug("kvm mpic %s: KVM_DEV_MPIC_BASE_ADDR %08llx not aligned\n",
1468 			 __func__, base);
1469 		return -EINVAL;
1470 	}
1471 
1472 	if (base == opp->reg_base)
1473 		return 0;
1474 
1475 	mutex_lock(&opp->kvm->slots_lock);
1476 
1477 	unmap_mmio(opp);
1478 	opp->reg_base = base;
1479 
1480 	pr_debug("kvm mpic %s: KVM_DEV_MPIC_BASE_ADDR %08llx\n",
1481 		 __func__, base);
1482 
1483 	if (base == 0)
1484 		goto out;
1485 
1486 	map_mmio(opp);
1487 
1488 out:
1489 	mutex_unlock(&opp->kvm->slots_lock);
1490 	return 0;
1491 }
1492 
1493 #define ATTR_SET		0
1494 #define ATTR_GET		1
1495 
1496 static int access_reg(struct openpic *opp, gpa_t addr, u32 *val, int type)
1497 {
1498 	int ret;
1499 
1500 	if (addr & 3)
1501 		return -ENXIO;
1502 
1503 	spin_lock_irq(&opp->lock);
1504 
1505 	if (type == ATTR_SET)
1506 		ret = kvm_mpic_write_internal(opp, addr, *val);
1507 	else
1508 		ret = kvm_mpic_read_internal(opp, addr, val);
1509 
1510 	spin_unlock_irq(&opp->lock);
1511 
1512 	pr_debug("%s: type %d addr %llx val %x\n", __func__, type, addr, *val);
1513 
1514 	return ret;
1515 }
1516 
1517 static int mpic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
1518 {
1519 	struct openpic *opp = dev->private;
1520 	u32 attr32;
1521 
1522 	switch (attr->group) {
1523 	case KVM_DEV_MPIC_GRP_MISC:
1524 		switch (attr->attr) {
1525 		case KVM_DEV_MPIC_BASE_ADDR:
1526 			return set_base_addr(opp, attr);
1527 		}
1528 
1529 		break;
1530 
1531 	case KVM_DEV_MPIC_GRP_REGISTER:
1532 		if (get_user(attr32, (u32 __user *)(long)attr->addr))
1533 			return -EFAULT;
1534 
1535 		return access_reg(opp, attr->attr, &attr32, ATTR_SET);
1536 
1537 	case KVM_DEV_MPIC_GRP_IRQ_ACTIVE:
1538 		if (attr->attr > MAX_SRC)
1539 			return -EINVAL;
1540 
1541 		if (get_user(attr32, (u32 __user *)(long)attr->addr))
1542 			return -EFAULT;
1543 
1544 		if (attr32 != 0 && attr32 != 1)
1545 			return -EINVAL;
1546 
1547 		spin_lock_irq(&opp->lock);
1548 		openpic_set_irq(opp, attr->attr, attr32);
1549 		spin_unlock_irq(&opp->lock);
1550 		return 0;
1551 	}
1552 
1553 	return -ENXIO;
1554 }
1555 
1556 static int mpic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
1557 {
1558 	struct openpic *opp = dev->private;
1559 	u64 attr64;
1560 	u32 attr32;
1561 	int ret;
1562 
1563 	switch (attr->group) {
1564 	case KVM_DEV_MPIC_GRP_MISC:
1565 		switch (attr->attr) {
1566 		case KVM_DEV_MPIC_BASE_ADDR:
1567 			mutex_lock(&opp->kvm->slots_lock);
1568 			attr64 = opp->reg_base;
1569 			mutex_unlock(&opp->kvm->slots_lock);
1570 
1571 			if (copy_to_user((u64 __user *)(long)attr->addr,
1572 					 &attr64, sizeof(u64)))
1573 				return -EFAULT;
1574 
1575 			return 0;
1576 		}
1577 
1578 		break;
1579 
1580 	case KVM_DEV_MPIC_GRP_REGISTER:
1581 		ret = access_reg(opp, attr->attr, &attr32, ATTR_GET);
1582 		if (ret)
1583 			return ret;
1584 
1585 		if (put_user(attr32, (u32 __user *)(long)attr->addr))
1586 			return -EFAULT;
1587 
1588 		return 0;
1589 
1590 	case KVM_DEV_MPIC_GRP_IRQ_ACTIVE:
1591 		if (attr->attr > MAX_SRC)
1592 			return -EINVAL;
1593 
1594 		spin_lock_irq(&opp->lock);
1595 		attr32 = opp->src[attr->attr].pending;
1596 		spin_unlock_irq(&opp->lock);
1597 
1598 		if (put_user(attr32, (u32 __user *)(long)attr->addr))
1599 			return -EFAULT;
1600 
1601 		return 0;
1602 	}
1603 
1604 	return -ENXIO;
1605 }
1606 
1607 static int mpic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
1608 {
1609 	switch (attr->group) {
1610 	case KVM_DEV_MPIC_GRP_MISC:
1611 		switch (attr->attr) {
1612 		case KVM_DEV_MPIC_BASE_ADDR:
1613 			return 0;
1614 		}
1615 
1616 		break;
1617 
1618 	case KVM_DEV_MPIC_GRP_REGISTER:
1619 		return 0;
1620 
1621 	case KVM_DEV_MPIC_GRP_IRQ_ACTIVE:
1622 		if (attr->attr > MAX_SRC)
1623 			break;
1624 
1625 		return 0;
1626 	}
1627 
1628 	return -ENXIO;
1629 }
1630 
1631 static void mpic_destroy(struct kvm_device *dev)
1632 {
1633 	struct openpic *opp = dev->private;
1634 
1635 	dev->kvm->arch.mpic = NULL;
1636 	kfree(opp);
1637 	kfree(dev);
1638 }
1639 
1640 static int mpic_set_default_irq_routing(struct openpic *opp)
1641 {
1642 	struct kvm_irq_routing_entry *routing;
1643 
1644 	/* Create a nop default map, so that dereferencing it still works */
1645 	routing = kzalloc((sizeof(*routing)), GFP_KERNEL);
1646 	if (!routing)
1647 		return -ENOMEM;
1648 
1649 	kvm_set_irq_routing(opp->kvm, routing, 0, 0);
1650 
1651 	kfree(routing);
1652 	return 0;
1653 }
1654 
1655 static int mpic_create(struct kvm_device *dev, u32 type)
1656 {
1657 	struct openpic *opp;
1658 	int ret;
1659 
1660 	/* We only support one MPIC at a time for now */
1661 	if (dev->kvm->arch.mpic)
1662 		return -EINVAL;
1663 
1664 	opp = kzalloc(sizeof(struct openpic), GFP_KERNEL);
1665 	if (!opp)
1666 		return -ENOMEM;
1667 
1668 	dev->private = opp;
1669 	opp->kvm = dev->kvm;
1670 	opp->dev = dev;
1671 	opp->model = type;
1672 	spin_lock_init(&opp->lock);
1673 
1674 	add_mmio_region(opp, &openpic_gbl_mmio);
1675 	add_mmio_region(opp, &openpic_tmr_mmio);
1676 	add_mmio_region(opp, &openpic_src_mmio);
1677 	add_mmio_region(opp, &openpic_cpu_mmio);
1678 
1679 	switch (opp->model) {
1680 	case KVM_DEV_TYPE_FSL_MPIC_20:
1681 		opp->fsl = &fsl_mpic_20;
1682 		opp->brr1 = 0x00400200;
1683 		opp->flags |= OPENPIC_FLAG_IDR_CRIT;
1684 		opp->nb_irqs = 80;
1685 		opp->mpic_mode_mask = GCR_MODE_MIXED;
1686 
1687 		fsl_common_init(opp);
1688 
1689 		break;
1690 
1691 	case KVM_DEV_TYPE_FSL_MPIC_42:
1692 		opp->fsl = &fsl_mpic_42;
1693 		opp->brr1 = 0x00400402;
1694 		opp->flags |= OPENPIC_FLAG_ILR;
1695 		opp->nb_irqs = 196;
1696 		opp->mpic_mode_mask = GCR_MODE_PROXY;
1697 
1698 		fsl_common_init(opp);
1699 
1700 		break;
1701 
1702 	default:
1703 		ret = -ENODEV;
1704 		goto err;
1705 	}
1706 
1707 	ret = mpic_set_default_irq_routing(opp);
1708 	if (ret)
1709 		goto err;
1710 
1711 	openpic_reset(opp);
1712 
1713 	smp_wmb();
1714 	dev->kvm->arch.mpic = opp;
1715 
1716 	return 0;
1717 
1718 err:
1719 	kfree(opp);
1720 	return ret;
1721 }
1722 
1723 struct kvm_device_ops kvm_mpic_ops = {
1724 	.name = "kvm-mpic",
1725 	.create = mpic_create,
1726 	.destroy = mpic_destroy,
1727 	.set_attr = mpic_set_attr,
1728 	.get_attr = mpic_get_attr,
1729 	.has_attr = mpic_has_attr,
1730 };
1731 
1732 int kvmppc_mpic_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu,
1733 			     u32 cpu)
1734 {
1735 	struct openpic *opp = dev->private;
1736 	int ret = 0;
1737 
1738 	if (dev->ops != &kvm_mpic_ops)
1739 		return -EPERM;
1740 	if (opp->kvm != vcpu->kvm)
1741 		return -EPERM;
1742 	if (cpu < 0 || cpu >= MAX_CPU)
1743 		return -EPERM;
1744 
1745 	spin_lock_irq(&opp->lock);
1746 
1747 	if (opp->dst[cpu].vcpu) {
1748 		ret = -EEXIST;
1749 		goto out;
1750 	}
1751 	if (vcpu->arch.irq_type) {
1752 		ret = -EBUSY;
1753 		goto out;
1754 	}
1755 
1756 	opp->dst[cpu].vcpu = vcpu;
1757 	opp->nb_cpus = max(opp->nb_cpus, cpu + 1);
1758 
1759 	vcpu->arch.mpic = opp;
1760 	vcpu->arch.irq_cpu_id = cpu;
1761 	vcpu->arch.irq_type = KVMPPC_IRQ_MPIC;
1762 
1763 	/* This might need to be changed if GCR gets extended */
1764 	if (opp->mpic_mode_mask == GCR_MODE_PROXY)
1765 		vcpu->arch.epr_flags |= KVMPPC_EPR_KERNEL;
1766 
1767 out:
1768 	spin_unlock_irq(&opp->lock);
1769 	return ret;
1770 }
1771 
1772 /*
1773  * This should only happen immediately before the mpic is destroyed,
1774  * so we shouldn't need to worry about anything still trying to
1775  * access the vcpu pointer.
1776  */
1777 void kvmppc_mpic_disconnect_vcpu(struct openpic *opp, struct kvm_vcpu *vcpu)
1778 {
1779 	BUG_ON(!opp->dst[vcpu->arch.irq_cpu_id].vcpu);
1780 
1781 	opp->dst[vcpu->arch.irq_cpu_id].vcpu = NULL;
1782 }
1783 
1784 /*
1785  * Return value:
1786  *  < 0   Interrupt was ignored (masked or not delivered for other reasons)
1787  *  = 0   Interrupt was coalesced (previous irq is still pending)
1788  *  > 0   Number of CPUs interrupt was delivered to
1789  */
1790 static int mpic_set_irq(struct kvm_kernel_irq_routing_entry *e,
1791 			struct kvm *kvm, int irq_source_id, int level,
1792 			bool line_status)
1793 {
1794 	u32 irq = e->irqchip.pin;
1795 	struct openpic *opp = kvm->arch.mpic;
1796 	unsigned long flags;
1797 
1798 	spin_lock_irqsave(&opp->lock, flags);
1799 	openpic_set_irq(opp, irq, level);
1800 	spin_unlock_irqrestore(&opp->lock, flags);
1801 
1802 	/* All code paths we care about don't check for the return value */
1803 	return 0;
1804 }
1805 
1806 int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
1807 		struct kvm *kvm, int irq_source_id, int level, bool line_status)
1808 {
1809 	struct openpic *opp = kvm->arch.mpic;
1810 	unsigned long flags;
1811 
1812 	spin_lock_irqsave(&opp->lock, flags);
1813 
1814 	/*
1815 	 * XXX We ignore the target address for now, as we only support
1816 	 *     a single MSI bank.
1817 	 */
1818 	openpic_msi_write(kvm->arch.mpic, MSIIR_OFFSET, e->msi.data);
1819 	spin_unlock_irqrestore(&opp->lock, flags);
1820 
1821 	/* All code paths we care about don't check for the return value */
1822 	return 0;
1823 }
1824 
1825 int kvm_set_routing_entry(struct kvm *kvm,
1826 			  struct kvm_kernel_irq_routing_entry *e,
1827 			  const struct kvm_irq_routing_entry *ue)
1828 {
1829 	int r = -EINVAL;
1830 
1831 	switch (ue->type) {
1832 	case KVM_IRQ_ROUTING_IRQCHIP:
1833 		e->set = mpic_set_irq;
1834 		e->irqchip.irqchip = ue->u.irqchip.irqchip;
1835 		e->irqchip.pin = ue->u.irqchip.pin;
1836 		if (e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS)
1837 			goto out;
1838 		break;
1839 	case KVM_IRQ_ROUTING_MSI:
1840 		e->set = kvm_set_msi;
1841 		e->msi.address_lo = ue->u.msi.address_lo;
1842 		e->msi.address_hi = ue->u.msi.address_hi;
1843 		e->msi.data = ue->u.msi.data;
1844 		break;
1845 	default:
1846 		goto out;
1847 	}
1848 
1849 	r = 0;
1850 out:
1851 	return r;
1852 }
1853