1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  *  linux/drivers/clocksource/arm_arch_timer.c
4  *
5  *  Copyright (C) 2011 ARM Ltd.
6  *  All Rights Reserved
7  */
8 
9 #define pr_fmt(fmt) 	"arch_timer: " fmt
10 
11 #include <linux/init.h>
12 #include <linux/kernel.h>
13 #include <linux/device.h>
14 #include <linux/smp.h>
15 #include <linux/cpu.h>
16 #include <linux/cpu_pm.h>
17 #include <linux/clockchips.h>
18 #include <linux/clocksource.h>
19 #include <linux/clocksource_ids.h>
20 #include <linux/interrupt.h>
21 #include <linux/of_irq.h>
22 #include <linux/of_address.h>
23 #include <linux/io.h>
24 #include <linux/slab.h>
25 #include <linux/sched/clock.h>
26 #include <linux/sched_clock.h>
27 #include <linux/acpi.h>
28 #include <linux/arm-smccc.h>
29 #include <linux/ptp_kvm.h>
30 
31 #include <asm/arch_timer.h>
32 #include <asm/virt.h>
33 
34 #include <clocksource/arm_arch_timer.h>
35 
36 #define CNTTIDR		0x08
37 #define CNTTIDR_VIRT(n)	(BIT(1) << ((n) * 4))
38 
39 #define CNTACR(n)	(0x40 + ((n) * 4))
40 #define CNTACR_RPCT	BIT(0)
41 #define CNTACR_RVCT	BIT(1)
42 #define CNTACR_RFRQ	BIT(2)
43 #define CNTACR_RVOFF	BIT(3)
44 #define CNTACR_RWVT	BIT(4)
45 #define CNTACR_RWPT	BIT(5)
46 
47 #define CNTPCT_LO	0x00
48 #define CNTVCT_LO	0x08
49 #define CNTFRQ		0x10
50 #define CNTP_CVAL_LO	0x20
51 #define CNTP_CTL	0x2c
52 #define CNTV_CVAL_LO	0x30
53 #define CNTV_CTL	0x3c
54 
55 /*
56  * The minimum amount of time a generic counter is guaranteed to not roll over
57  * (40 years)
58  */
59 #define MIN_ROLLOVER_SECS	(40ULL * 365 * 24 * 3600)
60 
61 static unsigned arch_timers_present __initdata;
62 
63 struct arch_timer {
64 	void __iomem *base;
65 	struct clock_event_device evt;
66 };
67 
68 static struct arch_timer *arch_timer_mem __ro_after_init;
69 
70 #define to_arch_timer(e) container_of(e, struct arch_timer, evt)
71 
72 static u32 arch_timer_rate __ro_after_init;
73 static int arch_timer_ppi[ARCH_TIMER_MAX_TIMER_PPI] __ro_after_init;
74 
75 static const char *arch_timer_ppi_names[ARCH_TIMER_MAX_TIMER_PPI] = {
76 	[ARCH_TIMER_PHYS_SECURE_PPI]	= "sec-phys",
77 	[ARCH_TIMER_PHYS_NONSECURE_PPI]	= "phys",
78 	[ARCH_TIMER_VIRT_PPI]		= "virt",
79 	[ARCH_TIMER_HYP_PPI]		= "hyp-phys",
80 	[ARCH_TIMER_HYP_VIRT_PPI]	= "hyp-virt",
81 };
82 
83 static struct clock_event_device __percpu *arch_timer_evt;
84 
85 static enum arch_timer_ppi_nr arch_timer_uses_ppi __ro_after_init = ARCH_TIMER_VIRT_PPI;
86 static bool arch_timer_c3stop __ro_after_init;
87 static bool arch_timer_mem_use_virtual __ro_after_init;
88 static bool arch_counter_suspend_stop __ro_after_init;
89 #ifdef CONFIG_GENERIC_GETTIMEOFDAY
90 static enum vdso_clock_mode vdso_default = VDSO_CLOCKMODE_ARCHTIMER;
91 #else
92 static enum vdso_clock_mode vdso_default = VDSO_CLOCKMODE_NONE;
93 #endif /* CONFIG_GENERIC_GETTIMEOFDAY */
94 
95 static cpumask_t evtstrm_available = CPU_MASK_NONE;
96 static bool evtstrm_enable __ro_after_init = IS_ENABLED(CONFIG_ARM_ARCH_TIMER_EVTSTREAM);
97 
98 static int __init early_evtstrm_cfg(char *buf)
99 {
100 	return strtobool(buf, &evtstrm_enable);
101 }
102 early_param("clocksource.arm_arch_timer.evtstrm", early_evtstrm_cfg);
103 
104 /*
105  * Makes an educated guess at a valid counter width based on the Generic Timer
106  * specification. Of note:
107  *   1) the system counter is at least 56 bits wide
108  *   2) a roll-over time of not less than 40 years
109  *
110  * See 'ARM DDI 0487G.a D11.1.2 ("The system counter")' for more details.
111  */
112 static int arch_counter_get_width(void)
113 {
114 	u64 min_cycles = MIN_ROLLOVER_SECS * arch_timer_rate;
115 
116 	/* guarantee the returned width is within the valid range */
117 	return clamp_val(ilog2(min_cycles - 1) + 1, 56, 64);
118 }
119 
120 /*
121  * Architected system timer support.
122  */
123 
124 static __always_inline
125 void arch_timer_reg_write(int access, enum arch_timer_reg reg, u64 val,
126 			  struct clock_event_device *clk)
127 {
128 	if (access == ARCH_TIMER_MEM_PHYS_ACCESS) {
129 		struct arch_timer *timer = to_arch_timer(clk);
130 		switch (reg) {
131 		case ARCH_TIMER_REG_CTRL:
132 			writel_relaxed((u32)val, timer->base + CNTP_CTL);
133 			break;
134 		case ARCH_TIMER_REG_CVAL:
135 			/*
136 			 * Not guaranteed to be atomic, so the timer
137 			 * must be disabled at this point.
138 			 */
139 			writeq_relaxed(val, timer->base + CNTP_CVAL_LO);
140 			break;
141 		default:
142 			BUILD_BUG();
143 		}
144 	} else if (access == ARCH_TIMER_MEM_VIRT_ACCESS) {
145 		struct arch_timer *timer = to_arch_timer(clk);
146 		switch (reg) {
147 		case ARCH_TIMER_REG_CTRL:
148 			writel_relaxed((u32)val, timer->base + CNTV_CTL);
149 			break;
150 		case ARCH_TIMER_REG_CVAL:
151 			/* Same restriction as above */
152 			writeq_relaxed(val, timer->base + CNTV_CVAL_LO);
153 			break;
154 		default:
155 			BUILD_BUG();
156 		}
157 	} else {
158 		arch_timer_reg_write_cp15(access, reg, val);
159 	}
160 }
161 
162 static __always_inline
163 u32 arch_timer_reg_read(int access, enum arch_timer_reg reg,
164 			struct clock_event_device *clk)
165 {
166 	u32 val;
167 
168 	if (access == ARCH_TIMER_MEM_PHYS_ACCESS) {
169 		struct arch_timer *timer = to_arch_timer(clk);
170 		switch (reg) {
171 		case ARCH_TIMER_REG_CTRL:
172 			val = readl_relaxed(timer->base + CNTP_CTL);
173 			break;
174 		default:
175 			BUILD_BUG();
176 		}
177 	} else if (access == ARCH_TIMER_MEM_VIRT_ACCESS) {
178 		struct arch_timer *timer = to_arch_timer(clk);
179 		switch (reg) {
180 		case ARCH_TIMER_REG_CTRL:
181 			val = readl_relaxed(timer->base + CNTV_CTL);
182 			break;
183 		default:
184 			BUILD_BUG();
185 		}
186 	} else {
187 		val = arch_timer_reg_read_cp15(access, reg);
188 	}
189 
190 	return val;
191 }
192 
193 static notrace u64 arch_counter_get_cntpct_stable(void)
194 {
195 	return __arch_counter_get_cntpct_stable();
196 }
197 
198 static notrace u64 arch_counter_get_cntpct(void)
199 {
200 	return __arch_counter_get_cntpct();
201 }
202 
203 static notrace u64 arch_counter_get_cntvct_stable(void)
204 {
205 	return __arch_counter_get_cntvct_stable();
206 }
207 
208 static notrace u64 arch_counter_get_cntvct(void)
209 {
210 	return __arch_counter_get_cntvct();
211 }
212 
213 /*
214  * Default to cp15 based access because arm64 uses this function for
215  * sched_clock() before DT is probed and the cp15 method is guaranteed
216  * to exist on arm64. arm doesn't use this before DT is probed so even
217  * if we don't have the cp15 accessors we won't have a problem.
218  */
219 u64 (*arch_timer_read_counter)(void) __ro_after_init = arch_counter_get_cntvct;
220 EXPORT_SYMBOL_GPL(arch_timer_read_counter);
221 
222 static u64 arch_counter_read(struct clocksource *cs)
223 {
224 	return arch_timer_read_counter();
225 }
226 
227 static u64 arch_counter_read_cc(const struct cyclecounter *cc)
228 {
229 	return arch_timer_read_counter();
230 }
231 
232 static struct clocksource clocksource_counter = {
233 	.name	= "arch_sys_counter",
234 	.id	= CSID_ARM_ARCH_COUNTER,
235 	.rating	= 400,
236 	.read	= arch_counter_read,
237 	.flags	= CLOCK_SOURCE_IS_CONTINUOUS,
238 };
239 
240 static struct cyclecounter cyclecounter __ro_after_init = {
241 	.read	= arch_counter_read_cc,
242 };
243 
244 struct ate_acpi_oem_info {
245 	char oem_id[ACPI_OEM_ID_SIZE + 1];
246 	char oem_table_id[ACPI_OEM_TABLE_ID_SIZE + 1];
247 	u32 oem_revision;
248 };
249 
250 #ifdef CONFIG_FSL_ERRATUM_A008585
251 /*
252  * The number of retries is an arbitrary value well beyond the highest number
253  * of iterations the loop has been observed to take.
254  */
255 #define __fsl_a008585_read_reg(reg) ({			\
256 	u64 _old, _new;					\
257 	int _retries = 200;				\
258 							\
259 	do {						\
260 		_old = read_sysreg(reg);		\
261 		_new = read_sysreg(reg);		\
262 		_retries--;				\
263 	} while (unlikely(_old != _new) && _retries);	\
264 							\
265 	WARN_ON_ONCE(!_retries);			\
266 	_new;						\
267 })
268 
269 static u64 notrace fsl_a008585_read_cntpct_el0(void)
270 {
271 	return __fsl_a008585_read_reg(cntpct_el0);
272 }
273 
274 static u64 notrace fsl_a008585_read_cntvct_el0(void)
275 {
276 	return __fsl_a008585_read_reg(cntvct_el0);
277 }
278 #endif
279 
280 #ifdef CONFIG_HISILICON_ERRATUM_161010101
281 /*
282  * Verify whether the value of the second read is larger than the first by
283  * less than 32 is the only way to confirm the value is correct, so clear the
284  * lower 5 bits to check whether the difference is greater than 32 or not.
285  * Theoretically the erratum should not occur more than twice in succession
286  * when reading the system counter, but it is possible that some interrupts
287  * may lead to more than twice read errors, triggering the warning, so setting
288  * the number of retries far beyond the number of iterations the loop has been
289  * observed to take.
290  */
291 #define __hisi_161010101_read_reg(reg) ({				\
292 	u64 _old, _new;						\
293 	int _retries = 50;					\
294 								\
295 	do {							\
296 		_old = read_sysreg(reg);			\
297 		_new = read_sysreg(reg);			\
298 		_retries--;					\
299 	} while (unlikely((_new - _old) >> 5) && _retries);	\
300 								\
301 	WARN_ON_ONCE(!_retries);				\
302 	_new;							\
303 })
304 
305 static u64 notrace hisi_161010101_read_cntpct_el0(void)
306 {
307 	return __hisi_161010101_read_reg(cntpct_el0);
308 }
309 
310 static u64 notrace hisi_161010101_read_cntvct_el0(void)
311 {
312 	return __hisi_161010101_read_reg(cntvct_el0);
313 }
314 
315 static struct ate_acpi_oem_info hisi_161010101_oem_info[] = {
316 	/*
317 	 * Note that trailing spaces are required to properly match
318 	 * the OEM table information.
319 	 */
320 	{
321 		.oem_id		= "HISI  ",
322 		.oem_table_id	= "HIP05   ",
323 		.oem_revision	= 0,
324 	},
325 	{
326 		.oem_id		= "HISI  ",
327 		.oem_table_id	= "HIP06   ",
328 		.oem_revision	= 0,
329 	},
330 	{
331 		.oem_id		= "HISI  ",
332 		.oem_table_id	= "HIP07   ",
333 		.oem_revision	= 0,
334 	},
335 	{ /* Sentinel indicating the end of the OEM array */ },
336 };
337 #endif
338 
339 #ifdef CONFIG_ARM64_ERRATUM_858921
340 static u64 notrace arm64_858921_read_cntpct_el0(void)
341 {
342 	u64 old, new;
343 
344 	old = read_sysreg(cntpct_el0);
345 	new = read_sysreg(cntpct_el0);
346 	return (((old ^ new) >> 32) & 1) ? old : new;
347 }
348 
349 static u64 notrace arm64_858921_read_cntvct_el0(void)
350 {
351 	u64 old, new;
352 
353 	old = read_sysreg(cntvct_el0);
354 	new = read_sysreg(cntvct_el0);
355 	return (((old ^ new) >> 32) & 1) ? old : new;
356 }
357 #endif
358 
359 #ifdef CONFIG_SUN50I_ERRATUM_UNKNOWN1
360 /*
361  * The low bits of the counter registers are indeterminate while bit 10 or
362  * greater is rolling over. Since the counter value can jump both backward
363  * (7ff -> 000 -> 800) and forward (7ff -> fff -> 800), ignore register values
364  * with all ones or all zeros in the low bits. Bound the loop by the maximum
365  * number of CPU cycles in 3 consecutive 24 MHz counter periods.
366  */
367 #define __sun50i_a64_read_reg(reg) ({					\
368 	u64 _val;							\
369 	int _retries = 150;						\
370 									\
371 	do {								\
372 		_val = read_sysreg(reg);				\
373 		_retries--;						\
374 	} while (((_val + 1) & GENMASK(8, 0)) <= 1 && _retries);	\
375 									\
376 	WARN_ON_ONCE(!_retries);					\
377 	_val;								\
378 })
379 
380 static u64 notrace sun50i_a64_read_cntpct_el0(void)
381 {
382 	return __sun50i_a64_read_reg(cntpct_el0);
383 }
384 
385 static u64 notrace sun50i_a64_read_cntvct_el0(void)
386 {
387 	return __sun50i_a64_read_reg(cntvct_el0);
388 }
389 #endif
390 
391 #ifdef CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND
392 DEFINE_PER_CPU(const struct arch_timer_erratum_workaround *, timer_unstable_counter_workaround);
393 EXPORT_SYMBOL_GPL(timer_unstable_counter_workaround);
394 
395 static atomic_t timer_unstable_counter_workaround_in_use = ATOMIC_INIT(0);
396 
397 /*
398  * Force the inlining of this function so that the register accesses
399  * can be themselves correctly inlined.
400  */
401 static __always_inline
402 void erratum_set_next_event_generic(const int access, unsigned long evt,
403 				    struct clock_event_device *clk)
404 {
405 	unsigned long ctrl;
406 	u64 cval;
407 
408 	ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, clk);
409 	ctrl |= ARCH_TIMER_CTRL_ENABLE;
410 	ctrl &= ~ARCH_TIMER_CTRL_IT_MASK;
411 
412 	if (access == ARCH_TIMER_PHYS_ACCESS) {
413 		cval = evt + arch_counter_get_cntpct_stable();
414 		write_sysreg(cval, cntp_cval_el0);
415 	} else {
416 		cval = evt + arch_counter_get_cntvct_stable();
417 		write_sysreg(cval, cntv_cval_el0);
418 	}
419 
420 	arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk);
421 }
422 
423 static __maybe_unused int erratum_set_next_event_virt(unsigned long evt,
424 					    struct clock_event_device *clk)
425 {
426 	erratum_set_next_event_generic(ARCH_TIMER_VIRT_ACCESS, evt, clk);
427 	return 0;
428 }
429 
430 static __maybe_unused int erratum_set_next_event_phys(unsigned long evt,
431 					    struct clock_event_device *clk)
432 {
433 	erratum_set_next_event_generic(ARCH_TIMER_PHYS_ACCESS, evt, clk);
434 	return 0;
435 }
436 
437 static const struct arch_timer_erratum_workaround ool_workarounds[] = {
438 #ifdef CONFIG_FSL_ERRATUM_A008585
439 	{
440 		.match_type = ate_match_dt,
441 		.id = "fsl,erratum-a008585",
442 		.desc = "Freescale erratum a005858",
443 		.read_cntpct_el0 = fsl_a008585_read_cntpct_el0,
444 		.read_cntvct_el0 = fsl_a008585_read_cntvct_el0,
445 		.set_next_event_phys = erratum_set_next_event_phys,
446 		.set_next_event_virt = erratum_set_next_event_virt,
447 	},
448 #endif
449 #ifdef CONFIG_HISILICON_ERRATUM_161010101
450 	{
451 		.match_type = ate_match_dt,
452 		.id = "hisilicon,erratum-161010101",
453 		.desc = "HiSilicon erratum 161010101",
454 		.read_cntpct_el0 = hisi_161010101_read_cntpct_el0,
455 		.read_cntvct_el0 = hisi_161010101_read_cntvct_el0,
456 		.set_next_event_phys = erratum_set_next_event_phys,
457 		.set_next_event_virt = erratum_set_next_event_virt,
458 	},
459 	{
460 		.match_type = ate_match_acpi_oem_info,
461 		.id = hisi_161010101_oem_info,
462 		.desc = "HiSilicon erratum 161010101",
463 		.read_cntpct_el0 = hisi_161010101_read_cntpct_el0,
464 		.read_cntvct_el0 = hisi_161010101_read_cntvct_el0,
465 		.set_next_event_phys = erratum_set_next_event_phys,
466 		.set_next_event_virt = erratum_set_next_event_virt,
467 	},
468 #endif
469 #ifdef CONFIG_ARM64_ERRATUM_858921
470 	{
471 		.match_type = ate_match_local_cap_id,
472 		.id = (void *)ARM64_WORKAROUND_858921,
473 		.desc = "ARM erratum 858921",
474 		.read_cntpct_el0 = arm64_858921_read_cntpct_el0,
475 		.read_cntvct_el0 = arm64_858921_read_cntvct_el0,
476 		.set_next_event_phys = erratum_set_next_event_phys,
477 		.set_next_event_virt = erratum_set_next_event_virt,
478 	},
479 #endif
480 #ifdef CONFIG_SUN50I_ERRATUM_UNKNOWN1
481 	{
482 		.match_type = ate_match_dt,
483 		.id = "allwinner,erratum-unknown1",
484 		.desc = "Allwinner erratum UNKNOWN1",
485 		.read_cntpct_el0 = sun50i_a64_read_cntpct_el0,
486 		.read_cntvct_el0 = sun50i_a64_read_cntvct_el0,
487 		.set_next_event_phys = erratum_set_next_event_phys,
488 		.set_next_event_virt = erratum_set_next_event_virt,
489 	},
490 #endif
491 #ifdef CONFIG_ARM64_ERRATUM_1418040
492 	{
493 		.match_type = ate_match_local_cap_id,
494 		.id = (void *)ARM64_WORKAROUND_1418040,
495 		.desc = "ARM erratum 1418040",
496 		.disable_compat_vdso = true,
497 	},
498 #endif
499 };
500 
501 typedef bool (*ate_match_fn_t)(const struct arch_timer_erratum_workaround *,
502 			       const void *);
503 
504 static
505 bool arch_timer_check_dt_erratum(const struct arch_timer_erratum_workaround *wa,
506 				 const void *arg)
507 {
508 	const struct device_node *np = arg;
509 
510 	return of_property_read_bool(np, wa->id);
511 }
512 
513 static
514 bool arch_timer_check_local_cap_erratum(const struct arch_timer_erratum_workaround *wa,
515 					const void *arg)
516 {
517 	return this_cpu_has_cap((uintptr_t)wa->id);
518 }
519 
520 
521 static
522 bool arch_timer_check_acpi_oem_erratum(const struct arch_timer_erratum_workaround *wa,
523 				       const void *arg)
524 {
525 	static const struct ate_acpi_oem_info empty_oem_info = {};
526 	const struct ate_acpi_oem_info *info = wa->id;
527 	const struct acpi_table_header *table = arg;
528 
529 	/* Iterate over the ACPI OEM info array, looking for a match */
530 	while (memcmp(info, &empty_oem_info, sizeof(*info))) {
531 		if (!memcmp(info->oem_id, table->oem_id, ACPI_OEM_ID_SIZE) &&
532 		    !memcmp(info->oem_table_id, table->oem_table_id, ACPI_OEM_TABLE_ID_SIZE) &&
533 		    info->oem_revision == table->oem_revision)
534 			return true;
535 
536 		info++;
537 	}
538 
539 	return false;
540 }
541 
542 static const struct arch_timer_erratum_workaround *
543 arch_timer_iterate_errata(enum arch_timer_erratum_match_type type,
544 			  ate_match_fn_t match_fn,
545 			  void *arg)
546 {
547 	int i;
548 
549 	for (i = 0; i < ARRAY_SIZE(ool_workarounds); i++) {
550 		if (ool_workarounds[i].match_type != type)
551 			continue;
552 
553 		if (match_fn(&ool_workarounds[i], arg))
554 			return &ool_workarounds[i];
555 	}
556 
557 	return NULL;
558 }
559 
560 static
561 void arch_timer_enable_workaround(const struct arch_timer_erratum_workaround *wa,
562 				  bool local)
563 {
564 	int i;
565 
566 	if (local) {
567 		__this_cpu_write(timer_unstable_counter_workaround, wa);
568 	} else {
569 		for_each_possible_cpu(i)
570 			per_cpu(timer_unstable_counter_workaround, i) = wa;
571 	}
572 
573 	if (wa->read_cntvct_el0 || wa->read_cntpct_el0)
574 		atomic_set(&timer_unstable_counter_workaround_in_use, 1);
575 
576 	/*
577 	 * Don't use the vdso fastpath if errata require using the
578 	 * out-of-line counter accessor. We may change our mind pretty
579 	 * late in the game (with a per-CPU erratum, for example), so
580 	 * change both the default value and the vdso itself.
581 	 */
582 	if (wa->read_cntvct_el0) {
583 		clocksource_counter.vdso_clock_mode = VDSO_CLOCKMODE_NONE;
584 		vdso_default = VDSO_CLOCKMODE_NONE;
585 	} else if (wa->disable_compat_vdso && vdso_default != VDSO_CLOCKMODE_NONE) {
586 		vdso_default = VDSO_CLOCKMODE_ARCHTIMER_NOCOMPAT;
587 		clocksource_counter.vdso_clock_mode = vdso_default;
588 	}
589 }
590 
591 static void arch_timer_check_ool_workaround(enum arch_timer_erratum_match_type type,
592 					    void *arg)
593 {
594 	const struct arch_timer_erratum_workaround *wa, *__wa;
595 	ate_match_fn_t match_fn = NULL;
596 	bool local = false;
597 
598 	switch (type) {
599 	case ate_match_dt:
600 		match_fn = arch_timer_check_dt_erratum;
601 		break;
602 	case ate_match_local_cap_id:
603 		match_fn = arch_timer_check_local_cap_erratum;
604 		local = true;
605 		break;
606 	case ate_match_acpi_oem_info:
607 		match_fn = arch_timer_check_acpi_oem_erratum;
608 		break;
609 	default:
610 		WARN_ON(1);
611 		return;
612 	}
613 
614 	wa = arch_timer_iterate_errata(type, match_fn, arg);
615 	if (!wa)
616 		return;
617 
618 	__wa = __this_cpu_read(timer_unstable_counter_workaround);
619 	if (__wa && wa != __wa)
620 		pr_warn("Can't enable workaround for %s (clashes with %s\n)",
621 			wa->desc, __wa->desc);
622 
623 	if (__wa)
624 		return;
625 
626 	arch_timer_enable_workaround(wa, local);
627 	pr_info("Enabling %s workaround for %s\n",
628 		local ? "local" : "global", wa->desc);
629 }
630 
631 static bool arch_timer_this_cpu_has_cntvct_wa(void)
632 {
633 	return has_erratum_handler(read_cntvct_el0);
634 }
635 
636 static bool arch_timer_counter_has_wa(void)
637 {
638 	return atomic_read(&timer_unstable_counter_workaround_in_use);
639 }
640 #else
641 #define arch_timer_check_ool_workaround(t,a)		do { } while(0)
642 #define arch_timer_this_cpu_has_cntvct_wa()		({false;})
643 #define arch_timer_counter_has_wa()			({false;})
644 #endif /* CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND */
645 
646 static __always_inline irqreturn_t timer_handler(const int access,
647 					struct clock_event_device *evt)
648 {
649 	unsigned long ctrl;
650 
651 	ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, evt);
652 	if (ctrl & ARCH_TIMER_CTRL_IT_STAT) {
653 		ctrl |= ARCH_TIMER_CTRL_IT_MASK;
654 		arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, evt);
655 		evt->event_handler(evt);
656 		return IRQ_HANDLED;
657 	}
658 
659 	return IRQ_NONE;
660 }
661 
662 static irqreturn_t arch_timer_handler_virt(int irq, void *dev_id)
663 {
664 	struct clock_event_device *evt = dev_id;
665 
666 	return timer_handler(ARCH_TIMER_VIRT_ACCESS, evt);
667 }
668 
669 static irqreturn_t arch_timer_handler_phys(int irq, void *dev_id)
670 {
671 	struct clock_event_device *evt = dev_id;
672 
673 	return timer_handler(ARCH_TIMER_PHYS_ACCESS, evt);
674 }
675 
676 static irqreturn_t arch_timer_handler_phys_mem(int irq, void *dev_id)
677 {
678 	struct clock_event_device *evt = dev_id;
679 
680 	return timer_handler(ARCH_TIMER_MEM_PHYS_ACCESS, evt);
681 }
682 
683 static irqreturn_t arch_timer_handler_virt_mem(int irq, void *dev_id)
684 {
685 	struct clock_event_device *evt = dev_id;
686 
687 	return timer_handler(ARCH_TIMER_MEM_VIRT_ACCESS, evt);
688 }
689 
690 static __always_inline int timer_shutdown(const int access,
691 					  struct clock_event_device *clk)
692 {
693 	unsigned long ctrl;
694 
695 	ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, clk);
696 	ctrl &= ~ARCH_TIMER_CTRL_ENABLE;
697 	arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk);
698 
699 	return 0;
700 }
701 
702 static int arch_timer_shutdown_virt(struct clock_event_device *clk)
703 {
704 	return timer_shutdown(ARCH_TIMER_VIRT_ACCESS, clk);
705 }
706 
707 static int arch_timer_shutdown_phys(struct clock_event_device *clk)
708 {
709 	return timer_shutdown(ARCH_TIMER_PHYS_ACCESS, clk);
710 }
711 
712 static int arch_timer_shutdown_virt_mem(struct clock_event_device *clk)
713 {
714 	return timer_shutdown(ARCH_TIMER_MEM_VIRT_ACCESS, clk);
715 }
716 
717 static int arch_timer_shutdown_phys_mem(struct clock_event_device *clk)
718 {
719 	return timer_shutdown(ARCH_TIMER_MEM_PHYS_ACCESS, clk);
720 }
721 
722 static __always_inline void set_next_event(const int access, unsigned long evt,
723 					   struct clock_event_device *clk)
724 {
725 	unsigned long ctrl;
726 	u64 cnt;
727 
728 	ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, clk);
729 	ctrl |= ARCH_TIMER_CTRL_ENABLE;
730 	ctrl &= ~ARCH_TIMER_CTRL_IT_MASK;
731 
732 	if (access == ARCH_TIMER_PHYS_ACCESS)
733 		cnt = __arch_counter_get_cntpct();
734 	else
735 		cnt = __arch_counter_get_cntvct();
736 
737 	arch_timer_reg_write(access, ARCH_TIMER_REG_CVAL, evt + cnt, clk);
738 	arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk);
739 }
740 
741 static int arch_timer_set_next_event_virt(unsigned long evt,
742 					  struct clock_event_device *clk)
743 {
744 	set_next_event(ARCH_TIMER_VIRT_ACCESS, evt, clk);
745 	return 0;
746 }
747 
748 static int arch_timer_set_next_event_phys(unsigned long evt,
749 					  struct clock_event_device *clk)
750 {
751 	set_next_event(ARCH_TIMER_PHYS_ACCESS, evt, clk);
752 	return 0;
753 }
754 
755 static u64 arch_counter_get_cnt_mem(struct arch_timer *t, int offset_lo)
756 {
757 	u32 cnt_lo, cnt_hi, tmp_hi;
758 
759 	do {
760 		cnt_hi = readl_relaxed(t->base + offset_lo + 4);
761 		cnt_lo = readl_relaxed(t->base + offset_lo);
762 		tmp_hi = readl_relaxed(t->base + offset_lo + 4);
763 	} while (cnt_hi != tmp_hi);
764 
765 	return ((u64) cnt_hi << 32) | cnt_lo;
766 }
767 
768 static __always_inline void set_next_event_mem(const int access, unsigned long evt,
769 					   struct clock_event_device *clk)
770 {
771 	struct arch_timer *timer = to_arch_timer(clk);
772 	unsigned long ctrl;
773 	u64 cnt;
774 
775 	ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, clk);
776 	ctrl |= ARCH_TIMER_CTRL_ENABLE;
777 	ctrl &= ~ARCH_TIMER_CTRL_IT_MASK;
778 
779 	if (access ==  ARCH_TIMER_MEM_VIRT_ACCESS)
780 		cnt = arch_counter_get_cnt_mem(timer, CNTVCT_LO);
781 	else
782 		cnt = arch_counter_get_cnt_mem(timer, CNTPCT_LO);
783 
784 	arch_timer_reg_write(access, ARCH_TIMER_REG_CVAL, evt + cnt, clk);
785 	arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk);
786 }
787 
788 static int arch_timer_set_next_event_virt_mem(unsigned long evt,
789 					      struct clock_event_device *clk)
790 {
791 	set_next_event_mem(ARCH_TIMER_MEM_VIRT_ACCESS, evt, clk);
792 	return 0;
793 }
794 
795 static int arch_timer_set_next_event_phys_mem(unsigned long evt,
796 					      struct clock_event_device *clk)
797 {
798 	set_next_event_mem(ARCH_TIMER_MEM_PHYS_ACCESS, evt, clk);
799 	return 0;
800 }
801 
802 static u64 __arch_timer_check_delta(void)
803 {
804 #ifdef CONFIG_ARM64
805 	const struct midr_range broken_cval_midrs[] = {
806 		/*
807 		 * XGene-1 implements CVAL in terms of TVAL, meaning
808 		 * that the maximum timer range is 32bit. Shame on them.
809 		 *
810 		 * Note that TVAL is signed, thus has only 31 of its
811 		 * 32 bits to express magnitude.
812 		 */
813 		MIDR_ALL_VERSIONS(MIDR_CPU_MODEL(ARM_CPU_IMP_APM,
814 						 APM_CPU_PART_POTENZA)),
815 		{},
816 	};
817 
818 	if (is_midr_in_range_list(read_cpuid_id(), broken_cval_midrs)) {
819 		pr_warn_once("Broken CNTx_CVAL_EL1, using 31 bit TVAL instead.\n");
820 		return CLOCKSOURCE_MASK(31);
821 	}
822 #endif
823 	return CLOCKSOURCE_MASK(arch_counter_get_width());
824 }
825 
826 static void __arch_timer_setup(unsigned type,
827 			       struct clock_event_device *clk)
828 {
829 	u64 max_delta;
830 
831 	clk->features = CLOCK_EVT_FEAT_ONESHOT;
832 
833 	if (type == ARCH_TIMER_TYPE_CP15) {
834 		typeof(clk->set_next_event) sne;
835 
836 		arch_timer_check_ool_workaround(ate_match_local_cap_id, NULL);
837 
838 		if (arch_timer_c3stop)
839 			clk->features |= CLOCK_EVT_FEAT_C3STOP;
840 		clk->name = "arch_sys_timer";
841 		clk->rating = 450;
842 		clk->cpumask = cpumask_of(smp_processor_id());
843 		clk->irq = arch_timer_ppi[arch_timer_uses_ppi];
844 		switch (arch_timer_uses_ppi) {
845 		case ARCH_TIMER_VIRT_PPI:
846 			clk->set_state_shutdown = arch_timer_shutdown_virt;
847 			clk->set_state_oneshot_stopped = arch_timer_shutdown_virt;
848 			sne = erratum_handler(set_next_event_virt);
849 			break;
850 		case ARCH_TIMER_PHYS_SECURE_PPI:
851 		case ARCH_TIMER_PHYS_NONSECURE_PPI:
852 		case ARCH_TIMER_HYP_PPI:
853 			clk->set_state_shutdown = arch_timer_shutdown_phys;
854 			clk->set_state_oneshot_stopped = arch_timer_shutdown_phys;
855 			sne = erratum_handler(set_next_event_phys);
856 			break;
857 		default:
858 			BUG();
859 		}
860 
861 		clk->set_next_event = sne;
862 		max_delta = __arch_timer_check_delta();
863 	} else {
864 		clk->features |= CLOCK_EVT_FEAT_DYNIRQ;
865 		clk->name = "arch_mem_timer";
866 		clk->rating = 400;
867 		clk->cpumask = cpu_possible_mask;
868 		if (arch_timer_mem_use_virtual) {
869 			clk->set_state_shutdown = arch_timer_shutdown_virt_mem;
870 			clk->set_state_oneshot_stopped = arch_timer_shutdown_virt_mem;
871 			clk->set_next_event =
872 				arch_timer_set_next_event_virt_mem;
873 		} else {
874 			clk->set_state_shutdown = arch_timer_shutdown_phys_mem;
875 			clk->set_state_oneshot_stopped = arch_timer_shutdown_phys_mem;
876 			clk->set_next_event =
877 				arch_timer_set_next_event_phys_mem;
878 		}
879 
880 		max_delta = CLOCKSOURCE_MASK(56);
881 	}
882 
883 	clk->set_state_shutdown(clk);
884 
885 	clockevents_config_and_register(clk, arch_timer_rate, 0xf, max_delta);
886 }
887 
888 static void arch_timer_evtstrm_enable(unsigned int divider)
889 {
890 	u32 cntkctl = arch_timer_get_cntkctl();
891 
892 #ifdef CONFIG_ARM64
893 	/* ECV is likely to require a large divider. Use the EVNTIS flag. */
894 	if (cpus_have_const_cap(ARM64_HAS_ECV) && divider > 15) {
895 		cntkctl |= ARCH_TIMER_EVT_INTERVAL_SCALE;
896 		divider -= 8;
897 	}
898 #endif
899 
900 	divider = min(divider, 15U);
901 	cntkctl &= ~ARCH_TIMER_EVT_TRIGGER_MASK;
902 	/* Set the divider and enable virtual event stream */
903 	cntkctl |= (divider << ARCH_TIMER_EVT_TRIGGER_SHIFT)
904 			| ARCH_TIMER_VIRT_EVT_EN;
905 	arch_timer_set_cntkctl(cntkctl);
906 	arch_timer_set_evtstrm_feature();
907 	cpumask_set_cpu(smp_processor_id(), &evtstrm_available);
908 }
909 
910 static void arch_timer_configure_evtstream(void)
911 {
912 	int evt_stream_div, lsb;
913 
914 	/*
915 	 * As the event stream can at most be generated at half the frequency
916 	 * of the counter, use half the frequency when computing the divider.
917 	 */
918 	evt_stream_div = arch_timer_rate / ARCH_TIMER_EVT_STREAM_FREQ / 2;
919 
920 	/*
921 	 * Find the closest power of two to the divisor. If the adjacent bit
922 	 * of lsb (last set bit, starts from 0) is set, then we use (lsb + 1).
923 	 */
924 	lsb = fls(evt_stream_div) - 1;
925 	if (lsb > 0 && (evt_stream_div & BIT(lsb - 1)))
926 		lsb++;
927 
928 	/* enable event stream */
929 	arch_timer_evtstrm_enable(max(0, lsb));
930 }
931 
932 static void arch_counter_set_user_access(void)
933 {
934 	u32 cntkctl = arch_timer_get_cntkctl();
935 
936 	/* Disable user access to the timers and both counters */
937 	/* Also disable virtual event stream */
938 	cntkctl &= ~(ARCH_TIMER_USR_PT_ACCESS_EN
939 			| ARCH_TIMER_USR_VT_ACCESS_EN
940 		        | ARCH_TIMER_USR_VCT_ACCESS_EN
941 			| ARCH_TIMER_VIRT_EVT_EN
942 			| ARCH_TIMER_USR_PCT_ACCESS_EN);
943 
944 	/*
945 	 * Enable user access to the virtual counter if it doesn't
946 	 * need to be workaround. The vdso may have been already
947 	 * disabled though.
948 	 */
949 	if (arch_timer_this_cpu_has_cntvct_wa())
950 		pr_info("CPU%d: Trapping CNTVCT access\n", smp_processor_id());
951 	else
952 		cntkctl |= ARCH_TIMER_USR_VCT_ACCESS_EN;
953 
954 	arch_timer_set_cntkctl(cntkctl);
955 }
956 
957 static bool arch_timer_has_nonsecure_ppi(void)
958 {
959 	return (arch_timer_uses_ppi == ARCH_TIMER_PHYS_SECURE_PPI &&
960 		arch_timer_ppi[ARCH_TIMER_PHYS_NONSECURE_PPI]);
961 }
962 
963 static u32 check_ppi_trigger(int irq)
964 {
965 	u32 flags = irq_get_trigger_type(irq);
966 
967 	if (flags != IRQF_TRIGGER_HIGH && flags != IRQF_TRIGGER_LOW) {
968 		pr_warn("WARNING: Invalid trigger for IRQ%d, assuming level low\n", irq);
969 		pr_warn("WARNING: Please fix your firmware\n");
970 		flags = IRQF_TRIGGER_LOW;
971 	}
972 
973 	return flags;
974 }
975 
976 static int arch_timer_starting_cpu(unsigned int cpu)
977 {
978 	struct clock_event_device *clk = this_cpu_ptr(arch_timer_evt);
979 	u32 flags;
980 
981 	__arch_timer_setup(ARCH_TIMER_TYPE_CP15, clk);
982 
983 	flags = check_ppi_trigger(arch_timer_ppi[arch_timer_uses_ppi]);
984 	enable_percpu_irq(arch_timer_ppi[arch_timer_uses_ppi], flags);
985 
986 	if (arch_timer_has_nonsecure_ppi()) {
987 		flags = check_ppi_trigger(arch_timer_ppi[ARCH_TIMER_PHYS_NONSECURE_PPI]);
988 		enable_percpu_irq(arch_timer_ppi[ARCH_TIMER_PHYS_NONSECURE_PPI],
989 				  flags);
990 	}
991 
992 	arch_counter_set_user_access();
993 	if (evtstrm_enable)
994 		arch_timer_configure_evtstream();
995 
996 	return 0;
997 }
998 
999 static int validate_timer_rate(void)
1000 {
1001 	if (!arch_timer_rate)
1002 		return -EINVAL;
1003 
1004 	/* Arch timer frequency < 1MHz can cause trouble */
1005 	WARN_ON(arch_timer_rate < 1000000);
1006 
1007 	return 0;
1008 }
1009 
1010 /*
1011  * For historical reasons, when probing with DT we use whichever (non-zero)
1012  * rate was probed first, and don't verify that others match. If the first node
1013  * probed has a clock-frequency property, this overrides the HW register.
1014  */
1015 static void __init arch_timer_of_configure_rate(u32 rate, struct device_node *np)
1016 {
1017 	/* Who has more than one independent system counter? */
1018 	if (arch_timer_rate)
1019 		return;
1020 
1021 	if (of_property_read_u32(np, "clock-frequency", &arch_timer_rate))
1022 		arch_timer_rate = rate;
1023 
1024 	/* Check the timer frequency. */
1025 	if (validate_timer_rate())
1026 		pr_warn("frequency not available\n");
1027 }
1028 
1029 static void __init arch_timer_banner(unsigned type)
1030 {
1031 	pr_info("%s%s%s timer(s) running at %lu.%02luMHz (%s%s%s).\n",
1032 		type & ARCH_TIMER_TYPE_CP15 ? "cp15" : "",
1033 		type == (ARCH_TIMER_TYPE_CP15 | ARCH_TIMER_TYPE_MEM) ?
1034 			" and " : "",
1035 		type & ARCH_TIMER_TYPE_MEM ? "mmio" : "",
1036 		(unsigned long)arch_timer_rate / 1000000,
1037 		(unsigned long)(arch_timer_rate / 10000) % 100,
1038 		type & ARCH_TIMER_TYPE_CP15 ?
1039 			(arch_timer_uses_ppi == ARCH_TIMER_VIRT_PPI) ? "virt" : "phys" :
1040 			"",
1041 		type == (ARCH_TIMER_TYPE_CP15 | ARCH_TIMER_TYPE_MEM) ? "/" : "",
1042 		type & ARCH_TIMER_TYPE_MEM ?
1043 			arch_timer_mem_use_virtual ? "virt" : "phys" :
1044 			"");
1045 }
1046 
1047 u32 arch_timer_get_rate(void)
1048 {
1049 	return arch_timer_rate;
1050 }
1051 
1052 bool arch_timer_evtstrm_available(void)
1053 {
1054 	/*
1055 	 * We might get called from a preemptible context. This is fine
1056 	 * because availability of the event stream should be always the same
1057 	 * for a preemptible context and context where we might resume a task.
1058 	 */
1059 	return cpumask_test_cpu(raw_smp_processor_id(), &evtstrm_available);
1060 }
1061 
1062 static u64 arch_counter_get_cntvct_mem(void)
1063 {
1064 	return arch_counter_get_cnt_mem(arch_timer_mem, CNTVCT_LO);
1065 }
1066 
1067 static struct arch_timer_kvm_info arch_timer_kvm_info;
1068 
1069 struct arch_timer_kvm_info *arch_timer_get_kvm_info(void)
1070 {
1071 	return &arch_timer_kvm_info;
1072 }
1073 
1074 static void __init arch_counter_register(unsigned type)
1075 {
1076 	u64 start_count;
1077 	int width;
1078 
1079 	/* Register the CP15 based counter if we have one */
1080 	if (type & ARCH_TIMER_TYPE_CP15) {
1081 		u64 (*rd)(void);
1082 
1083 		if ((IS_ENABLED(CONFIG_ARM64) && !is_hyp_mode_available()) ||
1084 		    arch_timer_uses_ppi == ARCH_TIMER_VIRT_PPI) {
1085 			if (arch_timer_counter_has_wa())
1086 				rd = arch_counter_get_cntvct_stable;
1087 			else
1088 				rd = arch_counter_get_cntvct;
1089 		} else {
1090 			if (arch_timer_counter_has_wa())
1091 				rd = arch_counter_get_cntpct_stable;
1092 			else
1093 				rd = arch_counter_get_cntpct;
1094 		}
1095 
1096 		arch_timer_read_counter = rd;
1097 		clocksource_counter.vdso_clock_mode = vdso_default;
1098 	} else {
1099 		arch_timer_read_counter = arch_counter_get_cntvct_mem;
1100 	}
1101 
1102 	width = arch_counter_get_width();
1103 	clocksource_counter.mask = CLOCKSOURCE_MASK(width);
1104 	cyclecounter.mask = CLOCKSOURCE_MASK(width);
1105 
1106 	if (!arch_counter_suspend_stop)
1107 		clocksource_counter.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP;
1108 	start_count = arch_timer_read_counter();
1109 	clocksource_register_hz(&clocksource_counter, arch_timer_rate);
1110 	cyclecounter.mult = clocksource_counter.mult;
1111 	cyclecounter.shift = clocksource_counter.shift;
1112 	timecounter_init(&arch_timer_kvm_info.timecounter,
1113 			 &cyclecounter, start_count);
1114 
1115 	sched_clock_register(arch_timer_read_counter, width, arch_timer_rate);
1116 }
1117 
1118 static void arch_timer_stop(struct clock_event_device *clk)
1119 {
1120 	pr_debug("disable IRQ%d cpu #%d\n", clk->irq, smp_processor_id());
1121 
1122 	disable_percpu_irq(arch_timer_ppi[arch_timer_uses_ppi]);
1123 	if (arch_timer_has_nonsecure_ppi())
1124 		disable_percpu_irq(arch_timer_ppi[ARCH_TIMER_PHYS_NONSECURE_PPI]);
1125 
1126 	clk->set_state_shutdown(clk);
1127 }
1128 
1129 static int arch_timer_dying_cpu(unsigned int cpu)
1130 {
1131 	struct clock_event_device *clk = this_cpu_ptr(arch_timer_evt);
1132 
1133 	cpumask_clear_cpu(smp_processor_id(), &evtstrm_available);
1134 
1135 	arch_timer_stop(clk);
1136 	return 0;
1137 }
1138 
1139 #ifdef CONFIG_CPU_PM
1140 static DEFINE_PER_CPU(unsigned long, saved_cntkctl);
1141 static int arch_timer_cpu_pm_notify(struct notifier_block *self,
1142 				    unsigned long action, void *hcpu)
1143 {
1144 	if (action == CPU_PM_ENTER) {
1145 		__this_cpu_write(saved_cntkctl, arch_timer_get_cntkctl());
1146 
1147 		cpumask_clear_cpu(smp_processor_id(), &evtstrm_available);
1148 	} else if (action == CPU_PM_ENTER_FAILED || action == CPU_PM_EXIT) {
1149 		arch_timer_set_cntkctl(__this_cpu_read(saved_cntkctl));
1150 
1151 		if (arch_timer_have_evtstrm_feature())
1152 			cpumask_set_cpu(smp_processor_id(), &evtstrm_available);
1153 	}
1154 	return NOTIFY_OK;
1155 }
1156 
1157 static struct notifier_block arch_timer_cpu_pm_notifier = {
1158 	.notifier_call = arch_timer_cpu_pm_notify,
1159 };
1160 
1161 static int __init arch_timer_cpu_pm_init(void)
1162 {
1163 	return cpu_pm_register_notifier(&arch_timer_cpu_pm_notifier);
1164 }
1165 
1166 static void __init arch_timer_cpu_pm_deinit(void)
1167 {
1168 	WARN_ON(cpu_pm_unregister_notifier(&arch_timer_cpu_pm_notifier));
1169 }
1170 
1171 #else
1172 static int __init arch_timer_cpu_pm_init(void)
1173 {
1174 	return 0;
1175 }
1176 
1177 static void __init arch_timer_cpu_pm_deinit(void)
1178 {
1179 }
1180 #endif
1181 
1182 static int __init arch_timer_register(void)
1183 {
1184 	int err;
1185 	int ppi;
1186 
1187 	arch_timer_evt = alloc_percpu(struct clock_event_device);
1188 	if (!arch_timer_evt) {
1189 		err = -ENOMEM;
1190 		goto out;
1191 	}
1192 
1193 	ppi = arch_timer_ppi[arch_timer_uses_ppi];
1194 	switch (arch_timer_uses_ppi) {
1195 	case ARCH_TIMER_VIRT_PPI:
1196 		err = request_percpu_irq(ppi, arch_timer_handler_virt,
1197 					 "arch_timer", arch_timer_evt);
1198 		break;
1199 	case ARCH_TIMER_PHYS_SECURE_PPI:
1200 	case ARCH_TIMER_PHYS_NONSECURE_PPI:
1201 		err = request_percpu_irq(ppi, arch_timer_handler_phys,
1202 					 "arch_timer", arch_timer_evt);
1203 		if (!err && arch_timer_has_nonsecure_ppi()) {
1204 			ppi = arch_timer_ppi[ARCH_TIMER_PHYS_NONSECURE_PPI];
1205 			err = request_percpu_irq(ppi, arch_timer_handler_phys,
1206 						 "arch_timer", arch_timer_evt);
1207 			if (err)
1208 				free_percpu_irq(arch_timer_ppi[ARCH_TIMER_PHYS_SECURE_PPI],
1209 						arch_timer_evt);
1210 		}
1211 		break;
1212 	case ARCH_TIMER_HYP_PPI:
1213 		err = request_percpu_irq(ppi, arch_timer_handler_phys,
1214 					 "arch_timer", arch_timer_evt);
1215 		break;
1216 	default:
1217 		BUG();
1218 	}
1219 
1220 	if (err) {
1221 		pr_err("can't register interrupt %d (%d)\n", ppi, err);
1222 		goto out_free;
1223 	}
1224 
1225 	err = arch_timer_cpu_pm_init();
1226 	if (err)
1227 		goto out_unreg_notify;
1228 
1229 	/* Register and immediately configure the timer on the boot CPU */
1230 	err = cpuhp_setup_state(CPUHP_AP_ARM_ARCH_TIMER_STARTING,
1231 				"clockevents/arm/arch_timer:starting",
1232 				arch_timer_starting_cpu, arch_timer_dying_cpu);
1233 	if (err)
1234 		goto out_unreg_cpupm;
1235 	return 0;
1236 
1237 out_unreg_cpupm:
1238 	arch_timer_cpu_pm_deinit();
1239 
1240 out_unreg_notify:
1241 	free_percpu_irq(arch_timer_ppi[arch_timer_uses_ppi], arch_timer_evt);
1242 	if (arch_timer_has_nonsecure_ppi())
1243 		free_percpu_irq(arch_timer_ppi[ARCH_TIMER_PHYS_NONSECURE_PPI],
1244 				arch_timer_evt);
1245 
1246 out_free:
1247 	free_percpu(arch_timer_evt);
1248 out:
1249 	return err;
1250 }
1251 
1252 static int __init arch_timer_mem_register(void __iomem *base, unsigned int irq)
1253 {
1254 	int ret;
1255 	irq_handler_t func;
1256 
1257 	arch_timer_mem = kzalloc(sizeof(*arch_timer_mem), GFP_KERNEL);
1258 	if (!arch_timer_mem)
1259 		return -ENOMEM;
1260 
1261 	arch_timer_mem->base = base;
1262 	arch_timer_mem->evt.irq = irq;
1263 	__arch_timer_setup(ARCH_TIMER_TYPE_MEM, &arch_timer_mem->evt);
1264 
1265 	if (arch_timer_mem_use_virtual)
1266 		func = arch_timer_handler_virt_mem;
1267 	else
1268 		func = arch_timer_handler_phys_mem;
1269 
1270 	ret = request_irq(irq, func, IRQF_TIMER, "arch_mem_timer", &arch_timer_mem->evt);
1271 	if (ret) {
1272 		pr_err("Failed to request mem timer irq\n");
1273 		kfree(arch_timer_mem);
1274 		arch_timer_mem = NULL;
1275 	}
1276 
1277 	return ret;
1278 }
1279 
1280 static const struct of_device_id arch_timer_of_match[] __initconst = {
1281 	{ .compatible   = "arm,armv7-timer",    },
1282 	{ .compatible   = "arm,armv8-timer",    },
1283 	{},
1284 };
1285 
1286 static const struct of_device_id arch_timer_mem_of_match[] __initconst = {
1287 	{ .compatible   = "arm,armv7-timer-mem", },
1288 	{},
1289 };
1290 
1291 static bool __init arch_timer_needs_of_probing(void)
1292 {
1293 	struct device_node *dn;
1294 	bool needs_probing = false;
1295 	unsigned int mask = ARCH_TIMER_TYPE_CP15 | ARCH_TIMER_TYPE_MEM;
1296 
1297 	/* We have two timers, and both device-tree nodes are probed. */
1298 	if ((arch_timers_present & mask) == mask)
1299 		return false;
1300 
1301 	/*
1302 	 * Only one type of timer is probed,
1303 	 * check if we have another type of timer node in device-tree.
1304 	 */
1305 	if (arch_timers_present & ARCH_TIMER_TYPE_CP15)
1306 		dn = of_find_matching_node(NULL, arch_timer_mem_of_match);
1307 	else
1308 		dn = of_find_matching_node(NULL, arch_timer_of_match);
1309 
1310 	if (dn && of_device_is_available(dn))
1311 		needs_probing = true;
1312 
1313 	of_node_put(dn);
1314 
1315 	return needs_probing;
1316 }
1317 
1318 static int __init arch_timer_common_init(void)
1319 {
1320 	arch_timer_banner(arch_timers_present);
1321 	arch_counter_register(arch_timers_present);
1322 	return arch_timer_arch_init();
1323 }
1324 
1325 /**
1326  * arch_timer_select_ppi() - Select suitable PPI for the current system.
1327  *
1328  * If HYP mode is available, we know that the physical timer
1329  * has been configured to be accessible from PL1. Use it, so
1330  * that a guest can use the virtual timer instead.
1331  *
1332  * On ARMv8.1 with VH extensions, the kernel runs in HYP. VHE
1333  * accesses to CNTP_*_EL1 registers are silently redirected to
1334  * their CNTHP_*_EL2 counterparts, and use a different PPI
1335  * number.
1336  *
1337  * If no interrupt provided for virtual timer, we'll have to
1338  * stick to the physical timer. It'd better be accessible...
1339  * For arm64 we never use the secure interrupt.
1340  *
1341  * Return: a suitable PPI type for the current system.
1342  */
1343 static enum arch_timer_ppi_nr __init arch_timer_select_ppi(void)
1344 {
1345 	if (is_kernel_in_hyp_mode())
1346 		return ARCH_TIMER_HYP_PPI;
1347 
1348 	if (!is_hyp_mode_available() && arch_timer_ppi[ARCH_TIMER_VIRT_PPI])
1349 		return ARCH_TIMER_VIRT_PPI;
1350 
1351 	if (IS_ENABLED(CONFIG_ARM64))
1352 		return ARCH_TIMER_PHYS_NONSECURE_PPI;
1353 
1354 	return ARCH_TIMER_PHYS_SECURE_PPI;
1355 }
1356 
1357 static void __init arch_timer_populate_kvm_info(void)
1358 {
1359 	arch_timer_kvm_info.virtual_irq = arch_timer_ppi[ARCH_TIMER_VIRT_PPI];
1360 	if (is_kernel_in_hyp_mode())
1361 		arch_timer_kvm_info.physical_irq = arch_timer_ppi[ARCH_TIMER_PHYS_NONSECURE_PPI];
1362 }
1363 
1364 static int __init arch_timer_of_init(struct device_node *np)
1365 {
1366 	int i, irq, ret;
1367 	u32 rate;
1368 	bool has_names;
1369 
1370 	if (arch_timers_present & ARCH_TIMER_TYPE_CP15) {
1371 		pr_warn("multiple nodes in dt, skipping\n");
1372 		return 0;
1373 	}
1374 
1375 	arch_timers_present |= ARCH_TIMER_TYPE_CP15;
1376 
1377 	has_names = of_property_read_bool(np, "interrupt-names");
1378 
1379 	for (i = ARCH_TIMER_PHYS_SECURE_PPI; i < ARCH_TIMER_MAX_TIMER_PPI; i++) {
1380 		if (has_names)
1381 			irq = of_irq_get_byname(np, arch_timer_ppi_names[i]);
1382 		else
1383 			irq = of_irq_get(np, i);
1384 		if (irq > 0)
1385 			arch_timer_ppi[i] = irq;
1386 	}
1387 
1388 	arch_timer_populate_kvm_info();
1389 
1390 	rate = arch_timer_get_cntfrq();
1391 	arch_timer_of_configure_rate(rate, np);
1392 
1393 	arch_timer_c3stop = !of_property_read_bool(np, "always-on");
1394 
1395 	/* Check for globally applicable workarounds */
1396 	arch_timer_check_ool_workaround(ate_match_dt, np);
1397 
1398 	/*
1399 	 * If we cannot rely on firmware initializing the timer registers then
1400 	 * we should use the physical timers instead.
1401 	 */
1402 	if (IS_ENABLED(CONFIG_ARM) &&
1403 	    of_property_read_bool(np, "arm,cpu-registers-not-fw-configured"))
1404 		arch_timer_uses_ppi = ARCH_TIMER_PHYS_SECURE_PPI;
1405 	else
1406 		arch_timer_uses_ppi = arch_timer_select_ppi();
1407 
1408 	if (!arch_timer_ppi[arch_timer_uses_ppi]) {
1409 		pr_err("No interrupt available, giving up\n");
1410 		return -EINVAL;
1411 	}
1412 
1413 	/* On some systems, the counter stops ticking when in suspend. */
1414 	arch_counter_suspend_stop = of_property_read_bool(np,
1415 							 "arm,no-tick-in-suspend");
1416 
1417 	ret = arch_timer_register();
1418 	if (ret)
1419 		return ret;
1420 
1421 	if (arch_timer_needs_of_probing())
1422 		return 0;
1423 
1424 	return arch_timer_common_init();
1425 }
1426 TIMER_OF_DECLARE(armv7_arch_timer, "arm,armv7-timer", arch_timer_of_init);
1427 TIMER_OF_DECLARE(armv8_arch_timer, "arm,armv8-timer", arch_timer_of_init);
1428 
1429 static u32 __init
1430 arch_timer_mem_frame_get_cntfrq(struct arch_timer_mem_frame *frame)
1431 {
1432 	void __iomem *base;
1433 	u32 rate;
1434 
1435 	base = ioremap(frame->cntbase, frame->size);
1436 	if (!base) {
1437 		pr_err("Unable to map frame @ %pa\n", &frame->cntbase);
1438 		return 0;
1439 	}
1440 
1441 	rate = readl_relaxed(base + CNTFRQ);
1442 
1443 	iounmap(base);
1444 
1445 	return rate;
1446 }
1447 
1448 static struct arch_timer_mem_frame * __init
1449 arch_timer_mem_find_best_frame(struct arch_timer_mem *timer_mem)
1450 {
1451 	struct arch_timer_mem_frame *frame, *best_frame = NULL;
1452 	void __iomem *cntctlbase;
1453 	u32 cnttidr;
1454 	int i;
1455 
1456 	cntctlbase = ioremap(timer_mem->cntctlbase, timer_mem->size);
1457 	if (!cntctlbase) {
1458 		pr_err("Can't map CNTCTLBase @ %pa\n",
1459 			&timer_mem->cntctlbase);
1460 		return NULL;
1461 	}
1462 
1463 	cnttidr = readl_relaxed(cntctlbase + CNTTIDR);
1464 
1465 	/*
1466 	 * Try to find a virtual capable frame. Otherwise fall back to a
1467 	 * physical capable frame.
1468 	 */
1469 	for (i = 0; i < ARCH_TIMER_MEM_MAX_FRAMES; i++) {
1470 		u32 cntacr = CNTACR_RFRQ | CNTACR_RWPT | CNTACR_RPCT |
1471 			     CNTACR_RWVT | CNTACR_RVOFF | CNTACR_RVCT;
1472 
1473 		frame = &timer_mem->frame[i];
1474 		if (!frame->valid)
1475 			continue;
1476 
1477 		/* Try enabling everything, and see what sticks */
1478 		writel_relaxed(cntacr, cntctlbase + CNTACR(i));
1479 		cntacr = readl_relaxed(cntctlbase + CNTACR(i));
1480 
1481 		if ((cnttidr & CNTTIDR_VIRT(i)) &&
1482 		    !(~cntacr & (CNTACR_RWVT | CNTACR_RVCT))) {
1483 			best_frame = frame;
1484 			arch_timer_mem_use_virtual = true;
1485 			break;
1486 		}
1487 
1488 		if (~cntacr & (CNTACR_RWPT | CNTACR_RPCT))
1489 			continue;
1490 
1491 		best_frame = frame;
1492 	}
1493 
1494 	iounmap(cntctlbase);
1495 
1496 	return best_frame;
1497 }
1498 
1499 static int __init
1500 arch_timer_mem_frame_register(struct arch_timer_mem_frame *frame)
1501 {
1502 	void __iomem *base;
1503 	int ret, irq = 0;
1504 
1505 	if (arch_timer_mem_use_virtual)
1506 		irq = frame->virt_irq;
1507 	else
1508 		irq = frame->phys_irq;
1509 
1510 	if (!irq) {
1511 		pr_err("Frame missing %s irq.\n",
1512 		       arch_timer_mem_use_virtual ? "virt" : "phys");
1513 		return -EINVAL;
1514 	}
1515 
1516 	if (!request_mem_region(frame->cntbase, frame->size,
1517 				"arch_mem_timer"))
1518 		return -EBUSY;
1519 
1520 	base = ioremap(frame->cntbase, frame->size);
1521 	if (!base) {
1522 		pr_err("Can't map frame's registers\n");
1523 		return -ENXIO;
1524 	}
1525 
1526 	ret = arch_timer_mem_register(base, irq);
1527 	if (ret) {
1528 		iounmap(base);
1529 		return ret;
1530 	}
1531 
1532 	arch_timers_present |= ARCH_TIMER_TYPE_MEM;
1533 
1534 	return 0;
1535 }
1536 
1537 static int __init arch_timer_mem_of_init(struct device_node *np)
1538 {
1539 	struct arch_timer_mem *timer_mem;
1540 	struct arch_timer_mem_frame *frame;
1541 	struct device_node *frame_node;
1542 	struct resource res;
1543 	int ret = -EINVAL;
1544 	u32 rate;
1545 
1546 	timer_mem = kzalloc(sizeof(*timer_mem), GFP_KERNEL);
1547 	if (!timer_mem)
1548 		return -ENOMEM;
1549 
1550 	if (of_address_to_resource(np, 0, &res))
1551 		goto out;
1552 	timer_mem->cntctlbase = res.start;
1553 	timer_mem->size = resource_size(&res);
1554 
1555 	for_each_available_child_of_node(np, frame_node) {
1556 		u32 n;
1557 		struct arch_timer_mem_frame *frame;
1558 
1559 		if (of_property_read_u32(frame_node, "frame-number", &n)) {
1560 			pr_err(FW_BUG "Missing frame-number.\n");
1561 			of_node_put(frame_node);
1562 			goto out;
1563 		}
1564 		if (n >= ARCH_TIMER_MEM_MAX_FRAMES) {
1565 			pr_err(FW_BUG "Wrong frame-number, only 0-%u are permitted.\n",
1566 			       ARCH_TIMER_MEM_MAX_FRAMES - 1);
1567 			of_node_put(frame_node);
1568 			goto out;
1569 		}
1570 		frame = &timer_mem->frame[n];
1571 
1572 		if (frame->valid) {
1573 			pr_err(FW_BUG "Duplicated frame-number.\n");
1574 			of_node_put(frame_node);
1575 			goto out;
1576 		}
1577 
1578 		if (of_address_to_resource(frame_node, 0, &res)) {
1579 			of_node_put(frame_node);
1580 			goto out;
1581 		}
1582 		frame->cntbase = res.start;
1583 		frame->size = resource_size(&res);
1584 
1585 		frame->virt_irq = irq_of_parse_and_map(frame_node,
1586 						       ARCH_TIMER_VIRT_SPI);
1587 		frame->phys_irq = irq_of_parse_and_map(frame_node,
1588 						       ARCH_TIMER_PHYS_SPI);
1589 
1590 		frame->valid = true;
1591 	}
1592 
1593 	frame = arch_timer_mem_find_best_frame(timer_mem);
1594 	if (!frame) {
1595 		pr_err("Unable to find a suitable frame in timer @ %pa\n",
1596 			&timer_mem->cntctlbase);
1597 		ret = -EINVAL;
1598 		goto out;
1599 	}
1600 
1601 	rate = arch_timer_mem_frame_get_cntfrq(frame);
1602 	arch_timer_of_configure_rate(rate, np);
1603 
1604 	ret = arch_timer_mem_frame_register(frame);
1605 	if (!ret && !arch_timer_needs_of_probing())
1606 		ret = arch_timer_common_init();
1607 out:
1608 	kfree(timer_mem);
1609 	return ret;
1610 }
1611 TIMER_OF_DECLARE(armv7_arch_timer_mem, "arm,armv7-timer-mem",
1612 		       arch_timer_mem_of_init);
1613 
1614 #ifdef CONFIG_ACPI_GTDT
1615 static int __init
1616 arch_timer_mem_verify_cntfrq(struct arch_timer_mem *timer_mem)
1617 {
1618 	struct arch_timer_mem_frame *frame;
1619 	u32 rate;
1620 	int i;
1621 
1622 	for (i = 0; i < ARCH_TIMER_MEM_MAX_FRAMES; i++) {
1623 		frame = &timer_mem->frame[i];
1624 
1625 		if (!frame->valid)
1626 			continue;
1627 
1628 		rate = arch_timer_mem_frame_get_cntfrq(frame);
1629 		if (rate == arch_timer_rate)
1630 			continue;
1631 
1632 		pr_err(FW_BUG "CNTFRQ mismatch: frame @ %pa: (0x%08lx), CPU: (0x%08lx)\n",
1633 			&frame->cntbase,
1634 			(unsigned long)rate, (unsigned long)arch_timer_rate);
1635 
1636 		return -EINVAL;
1637 	}
1638 
1639 	return 0;
1640 }
1641 
1642 static int __init arch_timer_mem_acpi_init(int platform_timer_count)
1643 {
1644 	struct arch_timer_mem *timers, *timer;
1645 	struct arch_timer_mem_frame *frame, *best_frame = NULL;
1646 	int timer_count, i, ret = 0;
1647 
1648 	timers = kcalloc(platform_timer_count, sizeof(*timers),
1649 			    GFP_KERNEL);
1650 	if (!timers)
1651 		return -ENOMEM;
1652 
1653 	ret = acpi_arch_timer_mem_init(timers, &timer_count);
1654 	if (ret || !timer_count)
1655 		goto out;
1656 
1657 	/*
1658 	 * While unlikely, it's theoretically possible that none of the frames
1659 	 * in a timer expose the combination of feature we want.
1660 	 */
1661 	for (i = 0; i < timer_count; i++) {
1662 		timer = &timers[i];
1663 
1664 		frame = arch_timer_mem_find_best_frame(timer);
1665 		if (!best_frame)
1666 			best_frame = frame;
1667 
1668 		ret = arch_timer_mem_verify_cntfrq(timer);
1669 		if (ret) {
1670 			pr_err("Disabling MMIO timers due to CNTFRQ mismatch\n");
1671 			goto out;
1672 		}
1673 
1674 		if (!best_frame) /* implies !frame */
1675 			/*
1676 			 * Only complain about missing suitable frames if we
1677 			 * haven't already found one in a previous iteration.
1678 			 */
1679 			pr_err("Unable to find a suitable frame in timer @ %pa\n",
1680 				&timer->cntctlbase);
1681 	}
1682 
1683 	if (best_frame)
1684 		ret = arch_timer_mem_frame_register(best_frame);
1685 out:
1686 	kfree(timers);
1687 	return ret;
1688 }
1689 
1690 /* Initialize per-processor generic timer and memory-mapped timer(if present) */
1691 static int __init arch_timer_acpi_init(struct acpi_table_header *table)
1692 {
1693 	int ret, platform_timer_count;
1694 
1695 	if (arch_timers_present & ARCH_TIMER_TYPE_CP15) {
1696 		pr_warn("already initialized, skipping\n");
1697 		return -EINVAL;
1698 	}
1699 
1700 	arch_timers_present |= ARCH_TIMER_TYPE_CP15;
1701 
1702 	ret = acpi_gtdt_init(table, &platform_timer_count);
1703 	if (ret)
1704 		return ret;
1705 
1706 	arch_timer_ppi[ARCH_TIMER_PHYS_NONSECURE_PPI] =
1707 		acpi_gtdt_map_ppi(ARCH_TIMER_PHYS_NONSECURE_PPI);
1708 
1709 	arch_timer_ppi[ARCH_TIMER_VIRT_PPI] =
1710 		acpi_gtdt_map_ppi(ARCH_TIMER_VIRT_PPI);
1711 
1712 	arch_timer_ppi[ARCH_TIMER_HYP_PPI] =
1713 		acpi_gtdt_map_ppi(ARCH_TIMER_HYP_PPI);
1714 
1715 	arch_timer_populate_kvm_info();
1716 
1717 	/*
1718 	 * When probing via ACPI, we have no mechanism to override the sysreg
1719 	 * CNTFRQ value. This *must* be correct.
1720 	 */
1721 	arch_timer_rate = arch_timer_get_cntfrq();
1722 	ret = validate_timer_rate();
1723 	if (ret) {
1724 		pr_err(FW_BUG "frequency not available.\n");
1725 		return ret;
1726 	}
1727 
1728 	arch_timer_uses_ppi = arch_timer_select_ppi();
1729 	if (!arch_timer_ppi[arch_timer_uses_ppi]) {
1730 		pr_err("No interrupt available, giving up\n");
1731 		return -EINVAL;
1732 	}
1733 
1734 	/* Always-on capability */
1735 	arch_timer_c3stop = acpi_gtdt_c3stop(arch_timer_uses_ppi);
1736 
1737 	/* Check for globally applicable workarounds */
1738 	arch_timer_check_ool_workaround(ate_match_acpi_oem_info, table);
1739 
1740 	ret = arch_timer_register();
1741 	if (ret)
1742 		return ret;
1743 
1744 	if (platform_timer_count &&
1745 	    arch_timer_mem_acpi_init(platform_timer_count))
1746 		pr_err("Failed to initialize memory-mapped timer.\n");
1747 
1748 	return arch_timer_common_init();
1749 }
1750 TIMER_ACPI_DECLARE(arch_timer, ACPI_SIG_GTDT, arch_timer_acpi_init);
1751 #endif
1752 
1753 int kvm_arch_ptp_get_crosststamp(u64 *cycle, struct timespec64 *ts,
1754 				 struct clocksource **cs)
1755 {
1756 	struct arm_smccc_res hvc_res;
1757 	u32 ptp_counter;
1758 	ktime_t ktime;
1759 
1760 	if (!IS_ENABLED(CONFIG_HAVE_ARM_SMCCC_DISCOVERY))
1761 		return -EOPNOTSUPP;
1762 
1763 	if (arch_timer_uses_ppi == ARCH_TIMER_VIRT_PPI)
1764 		ptp_counter = KVM_PTP_VIRT_COUNTER;
1765 	else
1766 		ptp_counter = KVM_PTP_PHYS_COUNTER;
1767 
1768 	arm_smccc_1_1_invoke(ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID,
1769 			     ptp_counter, &hvc_res);
1770 
1771 	if ((int)(hvc_res.a0) < 0)
1772 		return -EOPNOTSUPP;
1773 
1774 	ktime = (u64)hvc_res.a0 << 32 | hvc_res.a1;
1775 	*ts = ktime_to_timespec64(ktime);
1776 	if (cycle)
1777 		*cycle = (u64)hvc_res.a2 << 32 | hvc_res.a3;
1778 	if (cs)
1779 		*cs = &clocksource_counter;
1780 
1781 	return 0;
1782 }
1783 EXPORT_SYMBOL_GPL(kvm_arch_ptp_get_crosststamp);
1784