xref: /openbmc/linux/arch/csky/kernel/perf_event.c (revision 8fdf9062)
1 // SPDX-License-Identifier: GPL-2.0
2 // Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
3 
4 #include <linux/errno.h>
5 #include <linux/interrupt.h>
6 #include <linux/module.h>
7 #include <linux/of.h>
8 #include <linux/perf_event.h>
9 #include <linux/platform_device.h>
10 
11 #define CSKY_PMU_MAX_EVENTS 32
12 
13 #define HPCR		"<0, 0x0>"	/* PMU Control reg */
14 #define HPCNTENR	"<0, 0x4>"	/* Count Enable reg */
15 
16 static uint64_t (*hw_raw_read_mapping[CSKY_PMU_MAX_EVENTS])(void);
17 static void (*hw_raw_write_mapping[CSKY_PMU_MAX_EVENTS])(uint64_t val);
18 
19 struct csky_pmu_t {
20 	struct pmu	pmu;
21 	uint32_t	hpcr;
22 } csky_pmu;
23 
24 #define cprgr(reg)				\
25 ({						\
26 	unsigned int tmp;			\
27 	asm volatile("cprgr %0, "reg"\n"	\
28 		     : "=r"(tmp)		\
29 		     :				\
30 		     : "memory");		\
31 	tmp;					\
32 })
33 
34 #define cpwgr(reg, val)		\
35 ({				\
36 	asm volatile(		\
37 	"cpwgr %0, "reg"\n"	\
38 	:			\
39 	: "r"(val)		\
40 	: "memory");		\
41 })
42 
43 #define cprcr(reg)				\
44 ({						\
45 	unsigned int tmp;			\
46 	asm volatile("cprcr %0, "reg"\n"	\
47 		     : "=r"(tmp)		\
48 		     :				\
49 		     : "memory");		\
50 	tmp;					\
51 })
52 
53 #define cpwcr(reg, val)		\
54 ({				\
55 	asm volatile(		\
56 	"cpwcr %0, "reg"\n"	\
57 	:			\
58 	: "r"(val)		\
59 	: "memory");		\
60 })
61 
62 /* cycle counter */
63 static uint64_t csky_pmu_read_cc(void)
64 {
65 	uint32_t lo, hi, tmp;
66 	uint64_t result;
67 
68 	do {
69 		tmp = cprgr("<0, 0x3>");
70 		lo  = cprgr("<0, 0x2>");
71 		hi  = cprgr("<0, 0x3>");
72 	} while (hi != tmp);
73 
74 	result = (uint64_t) (hi) << 32;
75 	result |= lo;
76 
77 	return result;
78 }
79 
80 static void csky_pmu_write_cc(uint64_t val)
81 {
82 	cpwgr("<0, 0x2>", (uint32_t)  val);
83 	cpwgr("<0, 0x3>", (uint32_t) (val >> 32));
84 }
85 
86 /* instruction counter */
87 static uint64_t csky_pmu_read_ic(void)
88 {
89 	uint32_t lo, hi, tmp;
90 	uint64_t result;
91 
92 	do {
93 		tmp = cprgr("<0, 0x5>");
94 		lo  = cprgr("<0, 0x4>");
95 		hi  = cprgr("<0, 0x5>");
96 	} while (hi != tmp);
97 
98 	result = (uint64_t) (hi) << 32;
99 	result |= lo;
100 
101 	return result;
102 }
103 
104 static void csky_pmu_write_ic(uint64_t val)
105 {
106 	cpwgr("<0, 0x4>", (uint32_t)  val);
107 	cpwgr("<0, 0x5>", (uint32_t) (val >> 32));
108 }
109 
110 /* l1 icache access counter */
111 static uint64_t csky_pmu_read_icac(void)
112 {
113 	uint32_t lo, hi, tmp;
114 	uint64_t result;
115 
116 	do {
117 		tmp = cprgr("<0, 0x7>");
118 		lo  = cprgr("<0, 0x6>");
119 		hi  = cprgr("<0, 0x7>");
120 	} while (hi != tmp);
121 
122 	result = (uint64_t) (hi) << 32;
123 	result |= lo;
124 
125 	return result;
126 }
127 
128 static void csky_pmu_write_icac(uint64_t val)
129 {
130 	cpwgr("<0, 0x6>", (uint32_t)  val);
131 	cpwgr("<0, 0x7>", (uint32_t) (val >> 32));
132 }
133 
134 /* l1 icache miss counter */
135 static uint64_t csky_pmu_read_icmc(void)
136 {
137 	uint32_t lo, hi, tmp;
138 	uint64_t result;
139 
140 	do {
141 		tmp = cprgr("<0, 0x9>");
142 		lo  = cprgr("<0, 0x8>");
143 		hi  = cprgr("<0, 0x9>");
144 	} while (hi != tmp);
145 
146 	result = (uint64_t) (hi) << 32;
147 	result |= lo;
148 
149 	return result;
150 }
151 
152 static void csky_pmu_write_icmc(uint64_t val)
153 {
154 	cpwgr("<0, 0x8>", (uint32_t)  val);
155 	cpwgr("<0, 0x9>", (uint32_t) (val >> 32));
156 }
157 
158 /* l1 dcache access counter */
159 static uint64_t csky_pmu_read_dcac(void)
160 {
161 	uint32_t lo, hi, tmp;
162 	uint64_t result;
163 
164 	do {
165 		tmp = cprgr("<0, 0xb>");
166 		lo  = cprgr("<0, 0xa>");
167 		hi  = cprgr("<0, 0xb>");
168 	} while (hi != tmp);
169 
170 	result = (uint64_t) (hi) << 32;
171 	result |= lo;
172 
173 	return result;
174 }
175 
176 static void csky_pmu_write_dcac(uint64_t val)
177 {
178 	cpwgr("<0, 0xa>", (uint32_t)  val);
179 	cpwgr("<0, 0xb>", (uint32_t) (val >> 32));
180 }
181 
182 /* l1 dcache miss counter */
183 static uint64_t csky_pmu_read_dcmc(void)
184 {
185 	uint32_t lo, hi, tmp;
186 	uint64_t result;
187 
188 	do {
189 		tmp = cprgr("<0, 0xd>");
190 		lo  = cprgr("<0, 0xc>");
191 		hi  = cprgr("<0, 0xd>");
192 	} while (hi != tmp);
193 
194 	result = (uint64_t) (hi) << 32;
195 	result |= lo;
196 
197 	return result;
198 }
199 
200 static void csky_pmu_write_dcmc(uint64_t val)
201 {
202 	cpwgr("<0, 0xc>", (uint32_t)  val);
203 	cpwgr("<0, 0xd>", (uint32_t) (val >> 32));
204 }
205 
206 /* l2 cache access counter */
207 static uint64_t csky_pmu_read_l2ac(void)
208 {
209 	uint32_t lo, hi, tmp;
210 	uint64_t result;
211 
212 	do {
213 		tmp = cprgr("<0, 0xf>");
214 		lo  = cprgr("<0, 0xe>");
215 		hi  = cprgr("<0, 0xf>");
216 	} while (hi != tmp);
217 
218 	result = (uint64_t) (hi) << 32;
219 	result |= lo;
220 
221 	return result;
222 }
223 
224 static void csky_pmu_write_l2ac(uint64_t val)
225 {
226 	cpwgr("<0, 0xe>", (uint32_t)  val);
227 	cpwgr("<0, 0xf>", (uint32_t) (val >> 32));
228 }
229 
230 /* l2 cache miss counter */
231 static uint64_t csky_pmu_read_l2mc(void)
232 {
233 	uint32_t lo, hi, tmp;
234 	uint64_t result;
235 
236 	do {
237 		tmp = cprgr("<0, 0x11>");
238 		lo  = cprgr("<0, 0x10>");
239 		hi  = cprgr("<0, 0x11>");
240 	} while (hi != tmp);
241 
242 	result = (uint64_t) (hi) << 32;
243 	result |= lo;
244 
245 	return result;
246 }
247 
248 static void csky_pmu_write_l2mc(uint64_t val)
249 {
250 	cpwgr("<0, 0x10>", (uint32_t)  val);
251 	cpwgr("<0, 0x11>", (uint32_t) (val >> 32));
252 }
253 
254 /* I-UTLB miss counter */
255 static uint64_t csky_pmu_read_iutlbmc(void)
256 {
257 	uint32_t lo, hi, tmp;
258 	uint64_t result;
259 
260 	do {
261 		tmp = cprgr("<0, 0x15>");
262 		lo  = cprgr("<0, 0x14>");
263 		hi  = cprgr("<0, 0x15>");
264 	} while (hi != tmp);
265 
266 	result = (uint64_t) (hi) << 32;
267 	result |= lo;
268 
269 	return result;
270 }
271 
272 static void csky_pmu_write_iutlbmc(uint64_t val)
273 {
274 	cpwgr("<0, 0x14>", (uint32_t)  val);
275 	cpwgr("<0, 0x15>", (uint32_t) (val >> 32));
276 }
277 
278 /* D-UTLB miss counter */
279 static uint64_t csky_pmu_read_dutlbmc(void)
280 {
281 	uint32_t lo, hi, tmp;
282 	uint64_t result;
283 
284 	do {
285 		tmp = cprgr("<0, 0x17>");
286 		lo  = cprgr("<0, 0x16>");
287 		hi  = cprgr("<0, 0x17>");
288 	} while (hi != tmp);
289 
290 	result = (uint64_t) (hi) << 32;
291 	result |= lo;
292 
293 	return result;
294 }
295 
296 static void csky_pmu_write_dutlbmc(uint64_t val)
297 {
298 	cpwgr("<0, 0x16>", (uint32_t)  val);
299 	cpwgr("<0, 0x17>", (uint32_t) (val >> 32));
300 }
301 
302 /* JTLB miss counter */
303 static uint64_t csky_pmu_read_jtlbmc(void)
304 {
305 	uint32_t lo, hi, tmp;
306 	uint64_t result;
307 
308 	do {
309 		tmp = cprgr("<0, 0x19>");
310 		lo  = cprgr("<0, 0x18>");
311 		hi  = cprgr("<0, 0x19>");
312 	} while (hi != tmp);
313 
314 	result = (uint64_t) (hi) << 32;
315 	result |= lo;
316 
317 	return result;
318 }
319 
320 static void csky_pmu_write_jtlbmc(uint64_t val)
321 {
322 	cpwgr("<0, 0x18>", (uint32_t)  val);
323 	cpwgr("<0, 0x19>", (uint32_t) (val >> 32));
324 }
325 
326 /* software counter */
327 static uint64_t csky_pmu_read_softc(void)
328 {
329 	uint32_t lo, hi, tmp;
330 	uint64_t result;
331 
332 	do {
333 		tmp = cprgr("<0, 0x1b>");
334 		lo  = cprgr("<0, 0x1a>");
335 		hi  = cprgr("<0, 0x1b>");
336 	} while (hi != tmp);
337 
338 	result = (uint64_t) (hi) << 32;
339 	result |= lo;
340 
341 	return result;
342 }
343 
344 static void csky_pmu_write_softc(uint64_t val)
345 {
346 	cpwgr("<0, 0x1a>", (uint32_t)  val);
347 	cpwgr("<0, 0x1b>", (uint32_t) (val >> 32));
348 }
349 
350 /* conditional branch mispredict counter */
351 static uint64_t csky_pmu_read_cbmc(void)
352 {
353 	uint32_t lo, hi, tmp;
354 	uint64_t result;
355 
356 	do {
357 		tmp = cprgr("<0, 0x1d>");
358 		lo  = cprgr("<0, 0x1c>");
359 		hi  = cprgr("<0, 0x1d>");
360 	} while (hi != tmp);
361 
362 	result = (uint64_t) (hi) << 32;
363 	result |= lo;
364 
365 	return result;
366 }
367 
368 static void csky_pmu_write_cbmc(uint64_t val)
369 {
370 	cpwgr("<0, 0x1c>", (uint32_t)  val);
371 	cpwgr("<0, 0x1d>", (uint32_t) (val >> 32));
372 }
373 
374 /* conditional branch instruction counter */
375 static uint64_t csky_pmu_read_cbic(void)
376 {
377 	uint32_t lo, hi, tmp;
378 	uint64_t result;
379 
380 	do {
381 		tmp = cprgr("<0, 0x1f>");
382 		lo  = cprgr("<0, 0x1e>");
383 		hi  = cprgr("<0, 0x1f>");
384 	} while (hi != tmp);
385 
386 	result = (uint64_t) (hi) << 32;
387 	result |= lo;
388 
389 	return result;
390 }
391 
392 static void csky_pmu_write_cbic(uint64_t val)
393 {
394 	cpwgr("<0, 0x1e>", (uint32_t)  val);
395 	cpwgr("<0, 0x1f>", (uint32_t) (val >> 32));
396 }
397 
398 /* indirect branch mispredict counter */
399 static uint64_t csky_pmu_read_ibmc(void)
400 {
401 	uint32_t lo, hi, tmp;
402 	uint64_t result;
403 
404 	do {
405 		tmp = cprgr("<0, 0x21>");
406 		lo  = cprgr("<0, 0x20>");
407 		hi  = cprgr("<0, 0x21>");
408 	} while (hi != tmp);
409 
410 	result = (uint64_t) (hi) << 32;
411 	result |= lo;
412 
413 	return result;
414 }
415 
416 static void csky_pmu_write_ibmc(uint64_t val)
417 {
418 	cpwgr("<0, 0x20>", (uint32_t)  val);
419 	cpwgr("<0, 0x21>", (uint32_t) (val >> 32));
420 }
421 
422 /* indirect branch instruction counter */
423 static uint64_t csky_pmu_read_ibic(void)
424 {
425 	uint32_t lo, hi, tmp;
426 	uint64_t result;
427 
428 	do {
429 		tmp = cprgr("<0, 0x23>");
430 		lo  = cprgr("<0, 0x22>");
431 		hi  = cprgr("<0, 0x23>");
432 	} while (hi != tmp);
433 
434 	result = (uint64_t) (hi) << 32;
435 	result |= lo;
436 
437 	return result;
438 }
439 
440 static void csky_pmu_write_ibic(uint64_t val)
441 {
442 	cpwgr("<0, 0x22>", (uint32_t)  val);
443 	cpwgr("<0, 0x23>", (uint32_t) (val >> 32));
444 }
445 
446 /* LSU spec fail counter */
447 static uint64_t csky_pmu_read_lsfc(void)
448 {
449 	uint32_t lo, hi, tmp;
450 	uint64_t result;
451 
452 	do {
453 		tmp = cprgr("<0, 0x25>");
454 		lo  = cprgr("<0, 0x24>");
455 		hi  = cprgr("<0, 0x25>");
456 	} while (hi != tmp);
457 
458 	result = (uint64_t) (hi) << 32;
459 	result |= lo;
460 
461 	return result;
462 }
463 
464 static void csky_pmu_write_lsfc(uint64_t val)
465 {
466 	cpwgr("<0, 0x24>", (uint32_t)  val);
467 	cpwgr("<0, 0x25>", (uint32_t) (val >> 32));
468 }
469 
470 /* store instruction counter */
471 static uint64_t csky_pmu_read_sic(void)
472 {
473 	uint32_t lo, hi, tmp;
474 	uint64_t result;
475 
476 	do {
477 		tmp = cprgr("<0, 0x27>");
478 		lo  = cprgr("<0, 0x26>");
479 		hi  = cprgr("<0, 0x27>");
480 	} while (hi != tmp);
481 
482 	result = (uint64_t) (hi) << 32;
483 	result |= lo;
484 
485 	return result;
486 }
487 
488 static void csky_pmu_write_sic(uint64_t val)
489 {
490 	cpwgr("<0, 0x26>", (uint32_t)  val);
491 	cpwgr("<0, 0x27>", (uint32_t) (val >> 32));
492 }
493 
494 /* dcache read access counter */
495 static uint64_t csky_pmu_read_dcrac(void)
496 {
497 	uint32_t lo, hi, tmp;
498 	uint64_t result;
499 
500 	do {
501 		tmp = cprgr("<0, 0x29>");
502 		lo  = cprgr("<0, 0x28>");
503 		hi  = cprgr("<0, 0x29>");
504 	} while (hi != tmp);
505 
506 	result = (uint64_t) (hi) << 32;
507 	result |= lo;
508 
509 	return result;
510 }
511 
512 static void csky_pmu_write_dcrac(uint64_t val)
513 {
514 	cpwgr("<0, 0x28>", (uint32_t)  val);
515 	cpwgr("<0, 0x29>", (uint32_t) (val >> 32));
516 }
517 
518 /* dcache read miss counter */
519 static uint64_t csky_pmu_read_dcrmc(void)
520 {
521 	uint32_t lo, hi, tmp;
522 	uint64_t result;
523 
524 	do {
525 		tmp = cprgr("<0, 0x2b>");
526 		lo  = cprgr("<0, 0x2a>");
527 		hi  = cprgr("<0, 0x2b>");
528 	} while (hi != tmp);
529 
530 	result = (uint64_t) (hi) << 32;
531 	result |= lo;
532 
533 	return result;
534 }
535 
536 static void csky_pmu_write_dcrmc(uint64_t val)
537 {
538 	cpwgr("<0, 0x2a>", (uint32_t)  val);
539 	cpwgr("<0, 0x2b>", (uint32_t) (val >> 32));
540 }
541 
542 /* dcache write access counter */
543 static uint64_t csky_pmu_read_dcwac(void)
544 {
545 	uint32_t lo, hi, tmp;
546 	uint64_t result;
547 
548 	do {
549 		tmp = cprgr("<0, 0x2d>");
550 		lo  = cprgr("<0, 0x2c>");
551 		hi  = cprgr("<0, 0x2d>");
552 	} while (hi != tmp);
553 
554 	result = (uint64_t) (hi) << 32;
555 	result |= lo;
556 
557 	return result;
558 }
559 
560 static void csky_pmu_write_dcwac(uint64_t val)
561 {
562 	cpwgr("<0, 0x2c>", (uint32_t)  val);
563 	cpwgr("<0, 0x2d>", (uint32_t) (val >> 32));
564 }
565 
566 /* dcache write miss counter */
567 static uint64_t csky_pmu_read_dcwmc(void)
568 {
569 	uint32_t lo, hi, tmp;
570 	uint64_t result;
571 
572 	do {
573 		tmp = cprgr("<0, 0x2f>");
574 		lo  = cprgr("<0, 0x2e>");
575 		hi  = cprgr("<0, 0x2f>");
576 	} while (hi != tmp);
577 
578 	result = (uint64_t) (hi) << 32;
579 	result |= lo;
580 
581 	return result;
582 }
583 
584 static void csky_pmu_write_dcwmc(uint64_t val)
585 {
586 	cpwgr("<0, 0x2e>", (uint32_t)  val);
587 	cpwgr("<0, 0x2f>", (uint32_t) (val >> 32));
588 }
589 
590 /* l2cache read access counter */
591 static uint64_t csky_pmu_read_l2rac(void)
592 {
593 	uint32_t lo, hi, tmp;
594 	uint64_t result;
595 
596 	do {
597 		tmp = cprgr("<0, 0x31>");
598 		lo  = cprgr("<0, 0x30>");
599 		hi  = cprgr("<0, 0x31>");
600 	} while (hi != tmp);
601 
602 	result = (uint64_t) (hi) << 32;
603 	result |= lo;
604 
605 	return result;
606 }
607 
608 static void csky_pmu_write_l2rac(uint64_t val)
609 {
610 	cpwgr("<0, 0x30>", (uint32_t)  val);
611 	cpwgr("<0, 0x31>", (uint32_t) (val >> 32));
612 }
613 
614 /* l2cache read miss counter */
615 static uint64_t csky_pmu_read_l2rmc(void)
616 {
617 	uint32_t lo, hi, tmp;
618 	uint64_t result;
619 
620 	do {
621 		tmp = cprgr("<0, 0x33>");
622 		lo  = cprgr("<0, 0x32>");
623 		hi  = cprgr("<0, 0x33>");
624 	} while (hi != tmp);
625 
626 	result = (uint64_t) (hi) << 32;
627 	result |= lo;
628 
629 	return result;
630 }
631 
632 static void csky_pmu_write_l2rmc(uint64_t val)
633 {
634 	cpwgr("<0, 0x32>", (uint32_t)  val);
635 	cpwgr("<0, 0x33>", (uint32_t) (val >> 32));
636 }
637 
638 /* l2cache write access counter */
639 static uint64_t csky_pmu_read_l2wac(void)
640 {
641 	uint32_t lo, hi, tmp;
642 	uint64_t result;
643 
644 	do {
645 		tmp = cprgr("<0, 0x35>");
646 		lo  = cprgr("<0, 0x34>");
647 		hi  = cprgr("<0, 0x35>");
648 	} while (hi != tmp);
649 
650 	result = (uint64_t) (hi) << 32;
651 	result |= lo;
652 
653 	return result;
654 }
655 
656 static void csky_pmu_write_l2wac(uint64_t val)
657 {
658 	cpwgr("<0, 0x34>", (uint32_t)  val);
659 	cpwgr("<0, 0x35>", (uint32_t) (val >> 32));
660 }
661 
662 /* l2cache write miss counter */
663 static uint64_t csky_pmu_read_l2wmc(void)
664 {
665 	uint32_t lo, hi, tmp;
666 	uint64_t result;
667 
668 	do {
669 		tmp = cprgr("<0, 0x37>");
670 		lo  = cprgr("<0, 0x36>");
671 		hi  = cprgr("<0, 0x37>");
672 	} while (hi != tmp);
673 
674 	result = (uint64_t) (hi) << 32;
675 	result |= lo;
676 
677 	return result;
678 }
679 
680 static void csky_pmu_write_l2wmc(uint64_t val)
681 {
682 	cpwgr("<0, 0x36>", (uint32_t)  val);
683 	cpwgr("<0, 0x37>", (uint32_t) (val >> 32));
684 }
685 
686 #define HW_OP_UNSUPPORTED	0xffff
687 static const int csky_pmu_hw_map[PERF_COUNT_HW_MAX] = {
688 	[PERF_COUNT_HW_CPU_CYCLES]		= 0x1,
689 	[PERF_COUNT_HW_INSTRUCTIONS]		= 0x2,
690 	[PERF_COUNT_HW_CACHE_REFERENCES]	= HW_OP_UNSUPPORTED,
691 	[PERF_COUNT_HW_CACHE_MISSES]		= HW_OP_UNSUPPORTED,
692 	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0xf,
693 	[PERF_COUNT_HW_BRANCH_MISSES]		= 0xe,
694 	[PERF_COUNT_HW_BUS_CYCLES]		= HW_OP_UNSUPPORTED,
695 	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= HW_OP_UNSUPPORTED,
696 	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND]	= HW_OP_UNSUPPORTED,
697 	[PERF_COUNT_HW_REF_CPU_CYCLES]		= HW_OP_UNSUPPORTED,
698 };
699 
700 #define C(_x)			PERF_COUNT_HW_CACHE_##_x
701 #define CACHE_OP_UNSUPPORTED	0xffff
702 static const int csky_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
703 	[C(L1D)] = {
704 		[C(OP_READ)] = {
705 			[C(RESULT_ACCESS)]	= 0x14,
706 			[C(RESULT_MISS)]	= 0x15,
707 		},
708 		[C(OP_WRITE)] = {
709 			[C(RESULT_ACCESS)]	= 0x16,
710 			[C(RESULT_MISS)]	= 0x17,
711 		},
712 		[C(OP_PREFETCH)] = {
713 			[C(RESULT_ACCESS)]	= 0x5,
714 			[C(RESULT_MISS)]	= 0x6,
715 		},
716 	},
717 	[C(L1I)] = {
718 		[C(OP_READ)] = {
719 			[C(RESULT_ACCESS)]	= 0x3,
720 			[C(RESULT_MISS)]	= 0x4,
721 		},
722 		[C(OP_WRITE)] = {
723 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
724 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
725 		},
726 		[C(OP_PREFETCH)] = {
727 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
728 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
729 		},
730 	},
731 	[C(LL)] = {
732 		[C(OP_READ)] = {
733 			[C(RESULT_ACCESS)]	= 0x18,
734 			[C(RESULT_MISS)]	= 0x19,
735 		},
736 		[C(OP_WRITE)] = {
737 			[C(RESULT_ACCESS)]	= 0x1a,
738 			[C(RESULT_MISS)]	= 0x1b,
739 		},
740 		[C(OP_PREFETCH)] = {
741 			[C(RESULT_ACCESS)]	= 0x7,
742 			[C(RESULT_MISS)]	= 0x8,
743 		},
744 	},
745 	[C(DTLB)] = {
746 		[C(OP_READ)] = {
747 			[C(RESULT_ACCESS)]	= 0x5,
748 			[C(RESULT_MISS)]	= 0xb,
749 		},
750 		[C(OP_WRITE)] = {
751 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
752 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
753 		},
754 		[C(OP_PREFETCH)] = {
755 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
756 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
757 		},
758 	},
759 	[C(ITLB)] = {
760 		[C(OP_READ)] = {
761 			[C(RESULT_ACCESS)]	= 0x3,
762 			[C(RESULT_MISS)]	= 0xa,
763 		},
764 		[C(OP_WRITE)] = {
765 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
766 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
767 		},
768 		[C(OP_PREFETCH)] = {
769 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
770 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
771 		},
772 	},
773 	[C(BPU)] = {
774 		[C(OP_READ)] = {
775 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
776 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
777 		},
778 		[C(OP_WRITE)] = {
779 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
780 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
781 		},
782 		[C(OP_PREFETCH)] = {
783 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
784 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
785 		},
786 	},
787 	[C(NODE)] = {
788 		[C(OP_READ)] = {
789 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
790 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
791 		},
792 		[C(OP_WRITE)] = {
793 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
794 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
795 		},
796 		[C(OP_PREFETCH)] = {
797 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
798 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
799 		},
800 	},
801 };
802 
803 static void csky_perf_event_update(struct perf_event *event,
804 				   struct hw_perf_event *hwc)
805 {
806 	uint64_t prev_raw_count = local64_read(&hwc->prev_count);
807 	uint64_t new_raw_count = hw_raw_read_mapping[hwc->idx]();
808 	int64_t delta = new_raw_count - prev_raw_count;
809 
810 	/*
811 	 * We aren't afraid of hwc->prev_count changing beneath our feet
812 	 * because there's no way for us to re-enter this function anytime.
813 	 */
814 	local64_set(&hwc->prev_count, new_raw_count);
815 	local64_add(delta, &event->count);
816 	local64_sub(delta, &hwc->period_left);
817 }
818 
819 static void csky_pmu_read(struct perf_event *event)
820 {
821 	csky_perf_event_update(event, &event->hw);
822 }
823 
824 static int csky_pmu_cache_event(u64 config)
825 {
826 	unsigned int cache_type, cache_op, cache_result;
827 
828 	cache_type	= (config >>  0) & 0xff;
829 	cache_op	= (config >>  8) & 0xff;
830 	cache_result	= (config >> 16) & 0xff;
831 
832 	if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
833 		return -EINVAL;
834 	if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
835 		return -EINVAL;
836 	if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
837 		return -EINVAL;
838 
839 	return csky_pmu_cache_map[cache_type][cache_op][cache_result];
840 }
841 
842 static int csky_pmu_event_init(struct perf_event *event)
843 {
844 	struct hw_perf_event *hwc = &event->hw;
845 	int ret;
846 
847 	if (event->attr.exclude_user)
848 		csky_pmu.hpcr = BIT(2);
849 	else if (event->attr.exclude_kernel)
850 		csky_pmu.hpcr = BIT(3);
851 	else
852 		csky_pmu.hpcr = BIT(2) | BIT(3);
853 
854 	csky_pmu.hpcr |= BIT(1) | BIT(0);
855 
856 	switch (event->attr.type) {
857 	case PERF_TYPE_HARDWARE:
858 		if (event->attr.config >= PERF_COUNT_HW_MAX)
859 			return -ENOENT;
860 		ret = csky_pmu_hw_map[event->attr.config];
861 		if (ret == HW_OP_UNSUPPORTED)
862 			return -ENOENT;
863 		hwc->idx = ret;
864 		return 0;
865 	case PERF_TYPE_HW_CACHE:
866 		ret = csky_pmu_cache_event(event->attr.config);
867 		if (ret == CACHE_OP_UNSUPPORTED)
868 			return -ENOENT;
869 		hwc->idx = ret;
870 		return 0;
871 	case PERF_TYPE_RAW:
872 		if (hw_raw_read_mapping[event->attr.config] == NULL)
873 			return -ENOENT;
874 		hwc->idx = event->attr.config;
875 		return 0;
876 	default:
877 		return -ENOENT;
878 	}
879 }
880 
881 /* starts all counters */
882 static void csky_pmu_enable(struct pmu *pmu)
883 {
884 	cpwcr(HPCR, csky_pmu.hpcr);
885 }
886 
887 /* stops all counters */
888 static void csky_pmu_disable(struct pmu *pmu)
889 {
890 	cpwcr(HPCR, BIT(1));
891 }
892 
893 static void csky_pmu_start(struct perf_event *event, int flags)
894 {
895 	struct hw_perf_event *hwc = &event->hw;
896 	int idx = hwc->idx;
897 
898 	if (WARN_ON_ONCE(idx == -1))
899 		return;
900 
901 	if (flags & PERF_EF_RELOAD)
902 		WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
903 
904 	hwc->state = 0;
905 
906 	cpwcr(HPCNTENR, BIT(idx) | cprcr(HPCNTENR));
907 }
908 
909 static void csky_pmu_stop(struct perf_event *event, int flags)
910 {
911 	struct hw_perf_event *hwc = &event->hw;
912 	int idx = hwc->idx;
913 
914 	if (!(event->hw.state & PERF_HES_STOPPED)) {
915 		cpwcr(HPCNTENR, ~BIT(idx) & cprcr(HPCNTENR));
916 		event->hw.state |= PERF_HES_STOPPED;
917 	}
918 
919 	if ((flags & PERF_EF_UPDATE) &&
920 	    !(event->hw.state & PERF_HES_UPTODATE)) {
921 		csky_perf_event_update(event, &event->hw);
922 		event->hw.state |= PERF_HES_UPTODATE;
923 	}
924 }
925 
926 static void csky_pmu_del(struct perf_event *event, int flags)
927 {
928 	csky_pmu_stop(event, PERF_EF_UPDATE);
929 
930 	perf_event_update_userpage(event);
931 }
932 
933 /* allocate hardware counter and optionally start counting */
934 static int csky_pmu_add(struct perf_event *event, int flags)
935 {
936 	struct hw_perf_event *hwc = &event->hw;
937 
938 	local64_set(&hwc->prev_count, 0);
939 
940 	if (hw_raw_write_mapping[hwc->idx] != NULL)
941 		hw_raw_write_mapping[hwc->idx](0);
942 
943 	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
944 	if (flags & PERF_EF_START)
945 		csky_pmu_start(event, PERF_EF_RELOAD);
946 
947 	perf_event_update_userpage(event);
948 
949 	return 0;
950 }
951 
952 int __init init_hw_perf_events(void)
953 {
954 	csky_pmu.pmu = (struct pmu) {
955 		.pmu_enable	= csky_pmu_enable,
956 		.pmu_disable	= csky_pmu_disable,
957 		.event_init	= csky_pmu_event_init,
958 		.add		= csky_pmu_add,
959 		.del		= csky_pmu_del,
960 		.start		= csky_pmu_start,
961 		.stop		= csky_pmu_stop,
962 		.read		= csky_pmu_read,
963 	};
964 
965 	memset((void *)hw_raw_read_mapping, 0,
966 		sizeof(hw_raw_read_mapping[CSKY_PMU_MAX_EVENTS]));
967 
968 	hw_raw_read_mapping[0x1]  = csky_pmu_read_cc;
969 	hw_raw_read_mapping[0x2]  = csky_pmu_read_ic;
970 	hw_raw_read_mapping[0x3]  = csky_pmu_read_icac;
971 	hw_raw_read_mapping[0x4]  = csky_pmu_read_icmc;
972 	hw_raw_read_mapping[0x5]  = csky_pmu_read_dcac;
973 	hw_raw_read_mapping[0x6]  = csky_pmu_read_dcmc;
974 	hw_raw_read_mapping[0x7]  = csky_pmu_read_l2ac;
975 	hw_raw_read_mapping[0x8]  = csky_pmu_read_l2mc;
976 	hw_raw_read_mapping[0xa]  = csky_pmu_read_iutlbmc;
977 	hw_raw_read_mapping[0xb]  = csky_pmu_read_dutlbmc;
978 	hw_raw_read_mapping[0xc]  = csky_pmu_read_jtlbmc;
979 	hw_raw_read_mapping[0xd]  = csky_pmu_read_softc;
980 	hw_raw_read_mapping[0xe]  = csky_pmu_read_cbmc;
981 	hw_raw_read_mapping[0xf]  = csky_pmu_read_cbic;
982 	hw_raw_read_mapping[0x10] = csky_pmu_read_ibmc;
983 	hw_raw_read_mapping[0x11] = csky_pmu_read_ibic;
984 	hw_raw_read_mapping[0x12] = csky_pmu_read_lsfc;
985 	hw_raw_read_mapping[0x13] = csky_pmu_read_sic;
986 	hw_raw_read_mapping[0x14] = csky_pmu_read_dcrac;
987 	hw_raw_read_mapping[0x15] = csky_pmu_read_dcrmc;
988 	hw_raw_read_mapping[0x16] = csky_pmu_read_dcwac;
989 	hw_raw_read_mapping[0x17] = csky_pmu_read_dcwmc;
990 	hw_raw_read_mapping[0x18] = csky_pmu_read_l2rac;
991 	hw_raw_read_mapping[0x19] = csky_pmu_read_l2rmc;
992 	hw_raw_read_mapping[0x1a] = csky_pmu_read_l2wac;
993 	hw_raw_read_mapping[0x1b] = csky_pmu_read_l2wmc;
994 
995 	memset((void *)hw_raw_write_mapping, 0,
996 		sizeof(hw_raw_write_mapping[CSKY_PMU_MAX_EVENTS]));
997 
998 	hw_raw_write_mapping[0x1]  = csky_pmu_write_cc;
999 	hw_raw_write_mapping[0x2]  = csky_pmu_write_ic;
1000 	hw_raw_write_mapping[0x3]  = csky_pmu_write_icac;
1001 	hw_raw_write_mapping[0x4]  = csky_pmu_write_icmc;
1002 	hw_raw_write_mapping[0x5]  = csky_pmu_write_dcac;
1003 	hw_raw_write_mapping[0x6]  = csky_pmu_write_dcmc;
1004 	hw_raw_write_mapping[0x7]  = csky_pmu_write_l2ac;
1005 	hw_raw_write_mapping[0x8]  = csky_pmu_write_l2mc;
1006 	hw_raw_write_mapping[0xa]  = csky_pmu_write_iutlbmc;
1007 	hw_raw_write_mapping[0xb]  = csky_pmu_write_dutlbmc;
1008 	hw_raw_write_mapping[0xc]  = csky_pmu_write_jtlbmc;
1009 	hw_raw_write_mapping[0xd]  = csky_pmu_write_softc;
1010 	hw_raw_write_mapping[0xe]  = csky_pmu_write_cbmc;
1011 	hw_raw_write_mapping[0xf]  = csky_pmu_write_cbic;
1012 	hw_raw_write_mapping[0x10] = csky_pmu_write_ibmc;
1013 	hw_raw_write_mapping[0x11] = csky_pmu_write_ibic;
1014 	hw_raw_write_mapping[0x12] = csky_pmu_write_lsfc;
1015 	hw_raw_write_mapping[0x13] = csky_pmu_write_sic;
1016 	hw_raw_write_mapping[0x14] = csky_pmu_write_dcrac;
1017 	hw_raw_write_mapping[0x15] = csky_pmu_write_dcrmc;
1018 	hw_raw_write_mapping[0x16] = csky_pmu_write_dcwac;
1019 	hw_raw_write_mapping[0x17] = csky_pmu_write_dcwmc;
1020 	hw_raw_write_mapping[0x18] = csky_pmu_write_l2rac;
1021 	hw_raw_write_mapping[0x19] = csky_pmu_write_l2rmc;
1022 	hw_raw_write_mapping[0x1a] = csky_pmu_write_l2wac;
1023 	hw_raw_write_mapping[0x1b] = csky_pmu_write_l2wmc;
1024 
1025 	csky_pmu.pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
1026 
1027 	cpwcr(HPCR, BIT(31) | BIT(30) | BIT(1));
1028 
1029 	return perf_pmu_register(&csky_pmu.pmu, "cpu", PERF_TYPE_RAW);
1030 }
1031 arch_initcall(init_hw_perf_events);
1032