1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Common time routines among all ppc machines. 4 * 5 * Written by Cort Dougan (cort@cs.nmt.edu) to merge 6 * Paul Mackerras' version and mine for PReP and Pmac. 7 * MPC8xx/MBX changes by Dan Malek (dmalek@jlc.net). 8 * Converted for 64-bit by Mike Corrigan (mikejc@us.ibm.com) 9 * 10 * First round of bugfixes by Gabriel Paubert (paubert@iram.es) 11 * to make clock more stable (2.4.0-test5). The only thing 12 * that this code assumes is that the timebases have been synchronized 13 * by firmware on SMP and are never stopped (never do sleep 14 * on SMP then, nap and doze are OK). 15 * 16 * Speeded up do_gettimeofday by getting rid of references to 17 * xtime (which required locks for consistency). (mikejc@us.ibm.com) 18 * 19 * TODO (not necessarily in this file): 20 * - improve precision and reproducibility of timebase frequency 21 * measurement at boot time. 22 * - for astronomical applications: add a new function to get 23 * non ambiguous timestamps even around leap seconds. This needs 24 * a new timestamp format and a good name. 25 * 26 * 1997-09-10 Updated NTP code according to technical memorandum Jan '96 27 * "A Kernel Model for Precision Timekeeping" by Dave Mills 28 */ 29 30 #include <linux/errno.h> 31 #include <linux/export.h> 32 #include <linux/sched.h> 33 #include <linux/sched/clock.h> 34 #include <linux/kernel.h> 35 #include <linux/param.h> 36 #include <linux/string.h> 37 #include <linux/mm.h> 38 #include <linux/interrupt.h> 39 #include <linux/timex.h> 40 #include <linux/kernel_stat.h> 41 #include <linux/time.h> 42 #include <linux/init.h> 43 #include <linux/profile.h> 44 #include <linux/cpu.h> 45 #include <linux/security.h> 46 #include <linux/percpu.h> 47 #include <linux/rtc.h> 48 #include <linux/jiffies.h> 49 #include <linux/posix-timers.h> 50 #include <linux/irq.h> 51 #include <linux/delay.h> 52 #include <linux/irq_work.h> 53 #include <linux/of_clk.h> 54 #include <linux/suspend.h> 55 #include <linux/sched/cputime.h> 56 #include <linux/sched/clock.h> 57 #include <linux/processor.h> 58 #include <asm/trace.h> 59 60 #include <asm/interrupt.h> 61 #include <asm/io.h> 62 #include <asm/nvram.h> 63 #include <asm/cache.h> 64 #include <asm/machdep.h> 65 #include <linux/uaccess.h> 66 #include <asm/time.h> 67 #include <asm/prom.h> 68 #include <asm/irq.h> 69 #include <asm/div64.h> 70 #include <asm/smp.h> 71 #include <asm/vdso_datapage.h> 72 #include <asm/firmware.h> 73 #include <asm/asm-prototypes.h> 74 75 /* powerpc clocksource/clockevent code */ 76 77 #include <linux/clockchips.h> 78 #include <linux/timekeeper_internal.h> 79 80 static u64 timebase_read(struct clocksource *); 81 static struct clocksource clocksource_timebase = { 82 .name = "timebase", 83 .rating = 400, 84 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 85 .mask = CLOCKSOURCE_MASK(64), 86 .read = timebase_read, 87 .vdso_clock_mode = VDSO_CLOCKMODE_ARCHTIMER, 88 }; 89 90 #define DECREMENTER_DEFAULT_MAX 0x7FFFFFFF 91 u64 decrementer_max = DECREMENTER_DEFAULT_MAX; 92 93 static int decrementer_set_next_event(unsigned long evt, 94 struct clock_event_device *dev); 95 static int decrementer_shutdown(struct clock_event_device *evt); 96 97 struct clock_event_device decrementer_clockevent = { 98 .name = "decrementer", 99 .rating = 200, 100 .irq = 0, 101 .set_next_event = decrementer_set_next_event, 102 .set_state_oneshot_stopped = decrementer_shutdown, 103 .set_state_shutdown = decrementer_shutdown, 104 .tick_resume = decrementer_shutdown, 105 .features = CLOCK_EVT_FEAT_ONESHOT | 106 CLOCK_EVT_FEAT_C3STOP, 107 }; 108 EXPORT_SYMBOL(decrementer_clockevent); 109 110 DEFINE_PER_CPU(u64, decrementers_next_tb); 111 static DEFINE_PER_CPU(struct clock_event_device, decrementers); 112 113 #define XSEC_PER_SEC (1024*1024) 114 115 #ifdef CONFIG_PPC64 116 #define SCALE_XSEC(xsec, max) (((xsec) * max) / XSEC_PER_SEC) 117 #else 118 /* compute ((xsec << 12) * max) >> 32 */ 119 #define SCALE_XSEC(xsec, max) mulhwu((xsec) << 12, max) 120 #endif 121 122 unsigned long tb_ticks_per_jiffy; 123 unsigned long tb_ticks_per_usec = 100; /* sane default */ 124 EXPORT_SYMBOL(tb_ticks_per_usec); 125 unsigned long tb_ticks_per_sec; 126 EXPORT_SYMBOL(tb_ticks_per_sec); /* for cputime_t conversions */ 127 128 DEFINE_SPINLOCK(rtc_lock); 129 EXPORT_SYMBOL_GPL(rtc_lock); 130 131 static u64 tb_to_ns_scale __read_mostly; 132 static unsigned tb_to_ns_shift __read_mostly; 133 static u64 boot_tb __read_mostly; 134 135 extern struct timezone sys_tz; 136 static long timezone_offset; 137 138 unsigned long ppc_proc_freq; 139 EXPORT_SYMBOL_GPL(ppc_proc_freq); 140 unsigned long ppc_tb_freq; 141 EXPORT_SYMBOL_GPL(ppc_tb_freq); 142 143 bool tb_invalid; 144 145 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE 146 /* 147 * Factor for converting from cputime_t (timebase ticks) to 148 * microseconds. This is stored as 0.64 fixed-point binary fraction. 149 */ 150 u64 __cputime_usec_factor; 151 EXPORT_SYMBOL(__cputime_usec_factor); 152 153 #ifdef CONFIG_PPC_SPLPAR 154 void (*dtl_consumer)(struct dtl_entry *, u64); 155 #endif 156 157 static void calc_cputime_factors(void) 158 { 159 struct div_result res; 160 161 div128_by_32(1000000, 0, tb_ticks_per_sec, &res); 162 __cputime_usec_factor = res.result_low; 163 } 164 165 /* 166 * Read the SPURR on systems that have it, otherwise the PURR, 167 * or if that doesn't exist return the timebase value passed in. 168 */ 169 static inline unsigned long read_spurr(unsigned long tb) 170 { 171 if (cpu_has_feature(CPU_FTR_SPURR)) 172 return mfspr(SPRN_SPURR); 173 if (cpu_has_feature(CPU_FTR_PURR)) 174 return mfspr(SPRN_PURR); 175 return tb; 176 } 177 178 #ifdef CONFIG_PPC_SPLPAR 179 180 #include <asm/dtl.h> 181 182 /* 183 * Scan the dispatch trace log and count up the stolen time. 184 * Should be called with interrupts disabled. 185 */ 186 static u64 scan_dispatch_log(u64 stop_tb) 187 { 188 u64 i = local_paca->dtl_ridx; 189 struct dtl_entry *dtl = local_paca->dtl_curr; 190 struct dtl_entry *dtl_end = local_paca->dispatch_log_end; 191 struct lppaca *vpa = local_paca->lppaca_ptr; 192 u64 tb_delta; 193 u64 stolen = 0; 194 u64 dtb; 195 196 if (!dtl) 197 return 0; 198 199 if (i == be64_to_cpu(vpa->dtl_idx)) 200 return 0; 201 while (i < be64_to_cpu(vpa->dtl_idx)) { 202 dtb = be64_to_cpu(dtl->timebase); 203 tb_delta = be32_to_cpu(dtl->enqueue_to_dispatch_time) + 204 be32_to_cpu(dtl->ready_to_enqueue_time); 205 barrier(); 206 if (i + N_DISPATCH_LOG < be64_to_cpu(vpa->dtl_idx)) { 207 /* buffer has overflowed */ 208 i = be64_to_cpu(vpa->dtl_idx) - N_DISPATCH_LOG; 209 dtl = local_paca->dispatch_log + (i % N_DISPATCH_LOG); 210 continue; 211 } 212 if (dtb > stop_tb) 213 break; 214 if (dtl_consumer) 215 dtl_consumer(dtl, i); 216 stolen += tb_delta; 217 ++i; 218 ++dtl; 219 if (dtl == dtl_end) 220 dtl = local_paca->dispatch_log; 221 } 222 local_paca->dtl_ridx = i; 223 local_paca->dtl_curr = dtl; 224 return stolen; 225 } 226 227 /* 228 * Accumulate stolen time by scanning the dispatch trace log. 229 * Called on entry from user mode. 230 */ 231 void notrace accumulate_stolen_time(void) 232 { 233 u64 sst, ust; 234 unsigned long save_irq_soft_mask = irq_soft_mask_return(); 235 struct cpu_accounting_data *acct = &local_paca->accounting; 236 237 /* We are called early in the exception entry, before 238 * soft/hard_enabled are sync'ed to the expected state 239 * for the exception. We are hard disabled but the PACA 240 * needs to reflect that so various debug stuff doesn't 241 * complain 242 */ 243 irq_soft_mask_set(IRQS_DISABLED); 244 245 sst = scan_dispatch_log(acct->starttime_user); 246 ust = scan_dispatch_log(acct->starttime); 247 acct->stime -= sst; 248 acct->utime -= ust; 249 acct->steal_time += ust + sst; 250 251 irq_soft_mask_set(save_irq_soft_mask); 252 } 253 254 static inline u64 calculate_stolen_time(u64 stop_tb) 255 { 256 if (!firmware_has_feature(FW_FEATURE_SPLPAR)) 257 return 0; 258 259 if (get_paca()->dtl_ridx != be64_to_cpu(get_lppaca()->dtl_idx)) 260 return scan_dispatch_log(stop_tb); 261 262 return 0; 263 } 264 265 #else /* CONFIG_PPC_SPLPAR */ 266 static inline u64 calculate_stolen_time(u64 stop_tb) 267 { 268 return 0; 269 } 270 271 #endif /* CONFIG_PPC_SPLPAR */ 272 273 /* 274 * Account time for a transition between system, hard irq 275 * or soft irq state. 276 */ 277 static unsigned long vtime_delta_scaled(struct cpu_accounting_data *acct, 278 unsigned long now, unsigned long stime) 279 { 280 unsigned long stime_scaled = 0; 281 #ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME 282 unsigned long nowscaled, deltascaled; 283 unsigned long utime, utime_scaled; 284 285 nowscaled = read_spurr(now); 286 deltascaled = nowscaled - acct->startspurr; 287 acct->startspurr = nowscaled; 288 utime = acct->utime - acct->utime_sspurr; 289 acct->utime_sspurr = acct->utime; 290 291 /* 292 * Because we don't read the SPURR on every kernel entry/exit, 293 * deltascaled includes both user and system SPURR ticks. 294 * Apportion these ticks to system SPURR ticks and user 295 * SPURR ticks in the same ratio as the system time (delta) 296 * and user time (udelta) values obtained from the timebase 297 * over the same interval. The system ticks get accounted here; 298 * the user ticks get saved up in paca->user_time_scaled to be 299 * used by account_process_tick. 300 */ 301 stime_scaled = stime; 302 utime_scaled = utime; 303 if (deltascaled != stime + utime) { 304 if (utime) { 305 stime_scaled = deltascaled * stime / (stime + utime); 306 utime_scaled = deltascaled - stime_scaled; 307 } else { 308 stime_scaled = deltascaled; 309 } 310 } 311 acct->utime_scaled += utime_scaled; 312 #endif 313 314 return stime_scaled; 315 } 316 317 static unsigned long vtime_delta(struct cpu_accounting_data *acct, 318 unsigned long *stime_scaled, 319 unsigned long *steal_time) 320 { 321 unsigned long now, stime; 322 323 WARN_ON_ONCE(!irqs_disabled()); 324 325 now = mftb(); 326 stime = now - acct->starttime; 327 acct->starttime = now; 328 329 *stime_scaled = vtime_delta_scaled(acct, now, stime); 330 331 *steal_time = calculate_stolen_time(now); 332 333 return stime; 334 } 335 336 static void vtime_delta_kernel(struct cpu_accounting_data *acct, 337 unsigned long *stime, unsigned long *stime_scaled) 338 { 339 unsigned long steal_time; 340 341 *stime = vtime_delta(acct, stime_scaled, &steal_time); 342 *stime -= min(*stime, steal_time); 343 acct->steal_time += steal_time; 344 } 345 346 void vtime_account_kernel(struct task_struct *tsk) 347 { 348 struct cpu_accounting_data *acct = get_accounting(tsk); 349 unsigned long stime, stime_scaled; 350 351 vtime_delta_kernel(acct, &stime, &stime_scaled); 352 353 if (tsk->flags & PF_VCPU) { 354 acct->gtime += stime; 355 #ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME 356 acct->utime_scaled += stime_scaled; 357 #endif 358 } else { 359 acct->stime += stime; 360 #ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME 361 acct->stime_scaled += stime_scaled; 362 #endif 363 } 364 } 365 EXPORT_SYMBOL_GPL(vtime_account_kernel); 366 367 void vtime_account_idle(struct task_struct *tsk) 368 { 369 unsigned long stime, stime_scaled, steal_time; 370 struct cpu_accounting_data *acct = get_accounting(tsk); 371 372 stime = vtime_delta(acct, &stime_scaled, &steal_time); 373 acct->idle_time += stime + steal_time; 374 } 375 376 static void vtime_account_irq_field(struct cpu_accounting_data *acct, 377 unsigned long *field) 378 { 379 unsigned long stime, stime_scaled; 380 381 vtime_delta_kernel(acct, &stime, &stime_scaled); 382 *field += stime; 383 #ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME 384 acct->stime_scaled += stime_scaled; 385 #endif 386 } 387 388 void vtime_account_softirq(struct task_struct *tsk) 389 { 390 struct cpu_accounting_data *acct = get_accounting(tsk); 391 vtime_account_irq_field(acct, &acct->softirq_time); 392 } 393 394 void vtime_account_hardirq(struct task_struct *tsk) 395 { 396 struct cpu_accounting_data *acct = get_accounting(tsk); 397 vtime_account_irq_field(acct, &acct->hardirq_time); 398 } 399 400 static void vtime_flush_scaled(struct task_struct *tsk, 401 struct cpu_accounting_data *acct) 402 { 403 #ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME 404 if (acct->utime_scaled) 405 tsk->utimescaled += cputime_to_nsecs(acct->utime_scaled); 406 if (acct->stime_scaled) 407 tsk->stimescaled += cputime_to_nsecs(acct->stime_scaled); 408 409 acct->utime_scaled = 0; 410 acct->utime_sspurr = 0; 411 acct->stime_scaled = 0; 412 #endif 413 } 414 415 /* 416 * Account the whole cputime accumulated in the paca 417 * Must be called with interrupts disabled. 418 * Assumes that vtime_account_kernel/idle() has been called 419 * recently (i.e. since the last entry from usermode) so that 420 * get_paca()->user_time_scaled is up to date. 421 */ 422 void vtime_flush(struct task_struct *tsk) 423 { 424 struct cpu_accounting_data *acct = get_accounting(tsk); 425 426 if (acct->utime) 427 account_user_time(tsk, cputime_to_nsecs(acct->utime)); 428 429 if (acct->gtime) 430 account_guest_time(tsk, cputime_to_nsecs(acct->gtime)); 431 432 if (IS_ENABLED(CONFIG_PPC_SPLPAR) && acct->steal_time) { 433 account_steal_time(cputime_to_nsecs(acct->steal_time)); 434 acct->steal_time = 0; 435 } 436 437 if (acct->idle_time) 438 account_idle_time(cputime_to_nsecs(acct->idle_time)); 439 440 if (acct->stime) 441 account_system_index_time(tsk, cputime_to_nsecs(acct->stime), 442 CPUTIME_SYSTEM); 443 444 if (acct->hardirq_time) 445 account_system_index_time(tsk, cputime_to_nsecs(acct->hardirq_time), 446 CPUTIME_IRQ); 447 if (acct->softirq_time) 448 account_system_index_time(tsk, cputime_to_nsecs(acct->softirq_time), 449 CPUTIME_SOFTIRQ); 450 451 vtime_flush_scaled(tsk, acct); 452 453 acct->utime = 0; 454 acct->gtime = 0; 455 acct->idle_time = 0; 456 acct->stime = 0; 457 acct->hardirq_time = 0; 458 acct->softirq_time = 0; 459 } 460 461 #else /* ! CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ 462 #define calc_cputime_factors() 463 #endif 464 465 void __delay(unsigned long loops) 466 { 467 unsigned long start; 468 469 spin_begin(); 470 if (tb_invalid) { 471 /* 472 * TB is in error state and isn't ticking anymore. 473 * HMI handler was unable to recover from TB error. 474 * Return immediately, so that kernel won't get stuck here. 475 */ 476 spin_cpu_relax(); 477 } else { 478 start = mftb(); 479 while (mftb() - start < loops) 480 spin_cpu_relax(); 481 } 482 spin_end(); 483 } 484 EXPORT_SYMBOL(__delay); 485 486 void udelay(unsigned long usecs) 487 { 488 __delay(tb_ticks_per_usec * usecs); 489 } 490 EXPORT_SYMBOL(udelay); 491 492 #ifdef CONFIG_SMP 493 unsigned long profile_pc(struct pt_regs *regs) 494 { 495 unsigned long pc = instruction_pointer(regs); 496 497 if (in_lock_functions(pc)) 498 return regs->link; 499 500 return pc; 501 } 502 EXPORT_SYMBOL(profile_pc); 503 #endif 504 505 #ifdef CONFIG_IRQ_WORK 506 507 /* 508 * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable... 509 */ 510 #ifdef CONFIG_PPC64 511 static inline void set_irq_work_pending_flag(void) 512 { 513 asm volatile("stb %0,%1(13)" : : 514 "r" (1), 515 "i" (offsetof(struct paca_struct, irq_work_pending))); 516 } 517 518 static inline void clear_irq_work_pending(void) 519 { 520 asm volatile("stb %0,%1(13)" : : 521 "r" (0), 522 "i" (offsetof(struct paca_struct, irq_work_pending))); 523 } 524 525 #else /* 32-bit */ 526 527 DEFINE_PER_CPU(u8, irq_work_pending); 528 529 #define set_irq_work_pending_flag() __this_cpu_write(irq_work_pending, 1) 530 #define test_irq_work_pending() __this_cpu_read(irq_work_pending) 531 #define clear_irq_work_pending() __this_cpu_write(irq_work_pending, 0) 532 533 #endif /* 32 vs 64 bit */ 534 535 void arch_irq_work_raise(void) 536 { 537 /* 538 * 64-bit code that uses irq soft-mask can just cause an immediate 539 * interrupt here that gets soft masked, if this is called under 540 * local_irq_disable(). It might be possible to prevent that happening 541 * by noticing interrupts are disabled and setting decrementer pending 542 * to be replayed when irqs are enabled. The problem there is that 543 * tracing can call irq_work_raise, including in code that does low 544 * level manipulations of irq soft-mask state (e.g., trace_hardirqs_on) 545 * which could get tangled up if we're messing with the same state 546 * here. 547 */ 548 preempt_disable(); 549 set_irq_work_pending_flag(); 550 set_dec(1); 551 preempt_enable(); 552 } 553 554 #else /* CONFIG_IRQ_WORK */ 555 556 #define test_irq_work_pending() 0 557 #define clear_irq_work_pending() 558 559 #endif /* CONFIG_IRQ_WORK */ 560 561 /* 562 * timer_interrupt - gets called when the decrementer overflows, 563 * with interrupts disabled. 564 */ 565 DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt) 566 { 567 struct clock_event_device *evt = this_cpu_ptr(&decrementers); 568 u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); 569 struct pt_regs *old_regs; 570 u64 now; 571 572 /* 573 * Some implementations of hotplug will get timer interrupts while 574 * offline, just ignore these. 575 */ 576 if (unlikely(!cpu_online(smp_processor_id()))) { 577 set_dec(decrementer_max); 578 return; 579 } 580 581 /* Ensure a positive value is written to the decrementer, or else 582 * some CPUs will continue to take decrementer exceptions. When the 583 * PPC_WATCHDOG (decrementer based) is configured, keep this at most 584 * 31 bits, which is about 4 seconds on most systems, which gives 585 * the watchdog a chance of catching timer interrupt hard lockups. 586 */ 587 if (IS_ENABLED(CONFIG_PPC_WATCHDOG)) 588 set_dec(0x7fffffff); 589 else 590 set_dec(decrementer_max); 591 592 /* Conditionally hard-enable interrupts now that the DEC has been 593 * bumped to its maximum value 594 */ 595 may_hard_irq_enable(); 596 597 598 #if defined(CONFIG_PPC32) && defined(CONFIG_PPC_PMAC) 599 if (atomic_read(&ppc_n_lost_interrupts) != 0) 600 do_IRQ(regs); 601 #endif 602 603 old_regs = set_irq_regs(regs); 604 605 trace_timer_interrupt_entry(regs); 606 607 if (test_irq_work_pending()) { 608 clear_irq_work_pending(); 609 irq_work_run(); 610 } 611 612 now = get_tb(); 613 if (now >= *next_tb) { 614 *next_tb = ~(u64)0; 615 if (evt->event_handler) 616 evt->event_handler(evt); 617 __this_cpu_inc(irq_stat.timer_irqs_event); 618 } else { 619 now = *next_tb - now; 620 if (now <= decrementer_max) 621 set_dec(now); 622 /* We may have raced with new irq work */ 623 if (test_irq_work_pending()) 624 set_dec(1); 625 __this_cpu_inc(irq_stat.timer_irqs_others); 626 } 627 628 trace_timer_interrupt_exit(regs); 629 630 set_irq_regs(old_regs); 631 } 632 EXPORT_SYMBOL(timer_interrupt); 633 634 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST 635 void timer_broadcast_interrupt(void) 636 { 637 u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); 638 639 *next_tb = ~(u64)0; 640 tick_receive_broadcast(); 641 __this_cpu_inc(irq_stat.broadcast_irqs_event); 642 } 643 #endif 644 645 #ifdef CONFIG_SUSPEND 646 static void generic_suspend_disable_irqs(void) 647 { 648 /* Disable the decrementer, so that it doesn't interfere 649 * with suspending. 650 */ 651 652 set_dec(decrementer_max); 653 local_irq_disable(); 654 set_dec(decrementer_max); 655 } 656 657 static void generic_suspend_enable_irqs(void) 658 { 659 local_irq_enable(); 660 } 661 662 /* Overrides the weak version in kernel/power/main.c */ 663 void arch_suspend_disable_irqs(void) 664 { 665 if (ppc_md.suspend_disable_irqs) 666 ppc_md.suspend_disable_irqs(); 667 generic_suspend_disable_irqs(); 668 } 669 670 /* Overrides the weak version in kernel/power/main.c */ 671 void arch_suspend_enable_irqs(void) 672 { 673 generic_suspend_enable_irqs(); 674 if (ppc_md.suspend_enable_irqs) 675 ppc_md.suspend_enable_irqs(); 676 } 677 #endif 678 679 unsigned long long tb_to_ns(unsigned long long ticks) 680 { 681 return mulhdu(ticks, tb_to_ns_scale) << tb_to_ns_shift; 682 } 683 EXPORT_SYMBOL_GPL(tb_to_ns); 684 685 /* 686 * Scheduler clock - returns current time in nanosec units. 687 * 688 * Note: mulhdu(a, b) (multiply high double unsigned) returns 689 * the high 64 bits of a * b, i.e. (a * b) >> 64, where a and b 690 * are 64-bit unsigned numbers. 691 */ 692 notrace unsigned long long sched_clock(void) 693 { 694 return mulhdu(get_tb() - boot_tb, tb_to_ns_scale) << tb_to_ns_shift; 695 } 696 697 698 #ifdef CONFIG_PPC_PSERIES 699 700 /* 701 * Running clock - attempts to give a view of time passing for a virtualised 702 * kernels. 703 * Uses the VTB register if available otherwise a next best guess. 704 */ 705 unsigned long long running_clock(void) 706 { 707 /* 708 * Don't read the VTB as a host since KVM does not switch in host 709 * timebase into the VTB when it takes a guest off the CPU, reading the 710 * VTB would result in reading 'last switched out' guest VTB. 711 * 712 * Host kernels are often compiled with CONFIG_PPC_PSERIES checked, it 713 * would be unsafe to rely only on the #ifdef above. 714 */ 715 if (firmware_has_feature(FW_FEATURE_LPAR) && 716 cpu_has_feature(CPU_FTR_ARCH_207S)) 717 return mulhdu(get_vtb() - boot_tb, tb_to_ns_scale) << tb_to_ns_shift; 718 719 /* 720 * This is a next best approximation without a VTB. 721 * On a host which is running bare metal there should never be any stolen 722 * time and on a host which doesn't do any virtualisation TB *should* equal 723 * VTB so it makes no difference anyway. 724 */ 725 return local_clock() - kcpustat_this_cpu->cpustat[CPUTIME_STEAL]; 726 } 727 #endif 728 729 static int __init get_freq(char *name, int cells, unsigned long *val) 730 { 731 struct device_node *cpu; 732 const __be32 *fp; 733 int found = 0; 734 735 /* The cpu node should have timebase and clock frequency properties */ 736 cpu = of_find_node_by_type(NULL, "cpu"); 737 738 if (cpu) { 739 fp = of_get_property(cpu, name, NULL); 740 if (fp) { 741 found = 1; 742 *val = of_read_ulong(fp, cells); 743 } 744 745 of_node_put(cpu); 746 } 747 748 return found; 749 } 750 751 static void start_cpu_decrementer(void) 752 { 753 #if defined(CONFIG_BOOKE) || defined(CONFIG_40x) 754 unsigned int tcr; 755 756 /* Clear any pending timer interrupts */ 757 mtspr(SPRN_TSR, TSR_ENW | TSR_WIS | TSR_DIS | TSR_FIS); 758 759 tcr = mfspr(SPRN_TCR); 760 /* 761 * The watchdog may have already been enabled by u-boot. So leave 762 * TRC[WP] (Watchdog Period) alone. 763 */ 764 tcr &= TCR_WP_MASK; /* Clear all bits except for TCR[WP] */ 765 tcr |= TCR_DIE; /* Enable decrementer */ 766 mtspr(SPRN_TCR, tcr); 767 #endif 768 } 769 770 void __init generic_calibrate_decr(void) 771 { 772 ppc_tb_freq = DEFAULT_TB_FREQ; /* hardcoded default */ 773 774 if (!get_freq("ibm,extended-timebase-frequency", 2, &ppc_tb_freq) && 775 !get_freq("timebase-frequency", 1, &ppc_tb_freq)) { 776 777 printk(KERN_ERR "WARNING: Estimating decrementer frequency " 778 "(not found)\n"); 779 } 780 781 ppc_proc_freq = DEFAULT_PROC_FREQ; /* hardcoded default */ 782 783 if (!get_freq("ibm,extended-clock-frequency", 2, &ppc_proc_freq) && 784 !get_freq("clock-frequency", 1, &ppc_proc_freq)) { 785 786 printk(KERN_ERR "WARNING: Estimating processor frequency " 787 "(not found)\n"); 788 } 789 } 790 791 int update_persistent_clock64(struct timespec64 now) 792 { 793 struct rtc_time tm; 794 795 if (!ppc_md.set_rtc_time) 796 return -ENODEV; 797 798 rtc_time64_to_tm(now.tv_sec + 1 + timezone_offset, &tm); 799 800 return ppc_md.set_rtc_time(&tm); 801 } 802 803 static void __read_persistent_clock(struct timespec64 *ts) 804 { 805 struct rtc_time tm; 806 static int first = 1; 807 808 ts->tv_nsec = 0; 809 /* XXX this is a litle fragile but will work okay in the short term */ 810 if (first) { 811 first = 0; 812 if (ppc_md.time_init) 813 timezone_offset = ppc_md.time_init(); 814 815 /* get_boot_time() isn't guaranteed to be safe to call late */ 816 if (ppc_md.get_boot_time) { 817 ts->tv_sec = ppc_md.get_boot_time() - timezone_offset; 818 return; 819 } 820 } 821 if (!ppc_md.get_rtc_time) { 822 ts->tv_sec = 0; 823 return; 824 } 825 ppc_md.get_rtc_time(&tm); 826 827 ts->tv_sec = rtc_tm_to_time64(&tm); 828 } 829 830 void read_persistent_clock64(struct timespec64 *ts) 831 { 832 __read_persistent_clock(ts); 833 834 /* Sanitize it in case real time clock is set below EPOCH */ 835 if (ts->tv_sec < 0) { 836 ts->tv_sec = 0; 837 ts->tv_nsec = 0; 838 } 839 840 } 841 842 /* clocksource code */ 843 static notrace u64 timebase_read(struct clocksource *cs) 844 { 845 return (u64)get_tb(); 846 } 847 848 static void __init clocksource_init(void) 849 { 850 struct clocksource *clock = &clocksource_timebase; 851 852 if (clocksource_register_hz(clock, tb_ticks_per_sec)) { 853 printk(KERN_ERR "clocksource: %s is already registered\n", 854 clock->name); 855 return; 856 } 857 858 printk(KERN_INFO "clocksource: %s mult[%x] shift[%d] registered\n", 859 clock->name, clock->mult, clock->shift); 860 } 861 862 static int decrementer_set_next_event(unsigned long evt, 863 struct clock_event_device *dev) 864 { 865 __this_cpu_write(decrementers_next_tb, get_tb() + evt); 866 set_dec(evt); 867 868 /* We may have raced with new irq work */ 869 if (test_irq_work_pending()) 870 set_dec(1); 871 872 return 0; 873 } 874 875 static int decrementer_shutdown(struct clock_event_device *dev) 876 { 877 decrementer_set_next_event(decrementer_max, dev); 878 return 0; 879 } 880 881 static void register_decrementer_clockevent(int cpu) 882 { 883 struct clock_event_device *dec = &per_cpu(decrementers, cpu); 884 885 *dec = decrementer_clockevent; 886 dec->cpumask = cpumask_of(cpu); 887 888 clockevents_config_and_register(dec, ppc_tb_freq, 2, decrementer_max); 889 890 printk_once(KERN_DEBUG "clockevent: %s mult[%x] shift[%d] cpu[%d]\n", 891 dec->name, dec->mult, dec->shift, cpu); 892 893 /* Set values for KVM, see kvm_emulate_dec() */ 894 decrementer_clockevent.mult = dec->mult; 895 decrementer_clockevent.shift = dec->shift; 896 } 897 898 static void enable_large_decrementer(void) 899 { 900 if (!cpu_has_feature(CPU_FTR_ARCH_300)) 901 return; 902 903 if (decrementer_max <= DECREMENTER_DEFAULT_MAX) 904 return; 905 906 /* 907 * If we're running as the hypervisor we need to enable the LD manually 908 * otherwise firmware should have done it for us. 909 */ 910 if (cpu_has_feature(CPU_FTR_HVMODE)) 911 mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_LD); 912 } 913 914 static void __init set_decrementer_max(void) 915 { 916 struct device_node *cpu; 917 u32 bits = 32; 918 919 /* Prior to ISAv3 the decrementer is always 32 bit */ 920 if (!cpu_has_feature(CPU_FTR_ARCH_300)) 921 return; 922 923 cpu = of_find_node_by_type(NULL, "cpu"); 924 925 if (of_property_read_u32(cpu, "ibm,dec-bits", &bits) == 0) { 926 if (bits > 64 || bits < 32) { 927 pr_warn("time_init: firmware supplied invalid ibm,dec-bits"); 928 bits = 32; 929 } 930 931 /* calculate the signed maximum given this many bits */ 932 decrementer_max = (1ul << (bits - 1)) - 1; 933 } 934 935 of_node_put(cpu); 936 937 pr_info("time_init: %u bit decrementer (max: %llx)\n", 938 bits, decrementer_max); 939 } 940 941 static void __init init_decrementer_clockevent(void) 942 { 943 register_decrementer_clockevent(smp_processor_id()); 944 } 945 946 void secondary_cpu_time_init(void) 947 { 948 /* Enable and test the large decrementer for this cpu */ 949 enable_large_decrementer(); 950 951 /* Start the decrementer on CPUs that have manual control 952 * such as BookE 953 */ 954 start_cpu_decrementer(); 955 956 /* FIME: Should make unrelatred change to move snapshot_timebase 957 * call here ! */ 958 register_decrementer_clockevent(smp_processor_id()); 959 } 960 961 /* This function is only called on the boot processor */ 962 void __init time_init(void) 963 { 964 struct div_result res; 965 u64 scale; 966 unsigned shift; 967 968 /* Normal PowerPC with timebase register */ 969 ppc_md.calibrate_decr(); 970 printk(KERN_DEBUG "time_init: decrementer frequency = %lu.%.6lu MHz\n", 971 ppc_tb_freq / 1000000, ppc_tb_freq % 1000000); 972 printk(KERN_DEBUG "time_init: processor frequency = %lu.%.6lu MHz\n", 973 ppc_proc_freq / 1000000, ppc_proc_freq % 1000000); 974 975 tb_ticks_per_jiffy = ppc_tb_freq / HZ; 976 tb_ticks_per_sec = ppc_tb_freq; 977 tb_ticks_per_usec = ppc_tb_freq / 1000000; 978 calc_cputime_factors(); 979 980 /* 981 * Compute scale factor for sched_clock. 982 * The calibrate_decr() function has set tb_ticks_per_sec, 983 * which is the timebase frequency. 984 * We compute 1e9 * 2^64 / tb_ticks_per_sec and interpret 985 * the 128-bit result as a 64.64 fixed-point number. 986 * We then shift that number right until it is less than 1.0, 987 * giving us the scale factor and shift count to use in 988 * sched_clock(). 989 */ 990 div128_by_32(1000000000, 0, tb_ticks_per_sec, &res); 991 scale = res.result_low; 992 for (shift = 0; res.result_high != 0; ++shift) { 993 scale = (scale >> 1) | (res.result_high << 63); 994 res.result_high >>= 1; 995 } 996 tb_to_ns_scale = scale; 997 tb_to_ns_shift = shift; 998 /* Save the current timebase to pretty up CONFIG_PRINTK_TIME */ 999 boot_tb = get_tb(); 1000 1001 /* If platform provided a timezone (pmac), we correct the time */ 1002 if (timezone_offset) { 1003 sys_tz.tz_minuteswest = -timezone_offset / 60; 1004 sys_tz.tz_dsttime = 0; 1005 } 1006 1007 vdso_data->tb_ticks_per_sec = tb_ticks_per_sec; 1008 1009 /* initialise and enable the large decrementer (if we have one) */ 1010 set_decrementer_max(); 1011 enable_large_decrementer(); 1012 1013 /* Start the decrementer on CPUs that have manual control 1014 * such as BookE 1015 */ 1016 start_cpu_decrementer(); 1017 1018 /* Register the clocksource */ 1019 clocksource_init(); 1020 1021 init_decrementer_clockevent(); 1022 tick_setup_hrtimer_broadcast(); 1023 1024 of_clk_init(NULL); 1025 enable_sched_clock_irqtime(); 1026 } 1027 1028 /* 1029 * Divide a 128-bit dividend by a 32-bit divisor, leaving a 128 bit 1030 * result. 1031 */ 1032 void div128_by_32(u64 dividend_high, u64 dividend_low, 1033 unsigned divisor, struct div_result *dr) 1034 { 1035 unsigned long a, b, c, d; 1036 unsigned long w, x, y, z; 1037 u64 ra, rb, rc; 1038 1039 a = dividend_high >> 32; 1040 b = dividend_high & 0xffffffff; 1041 c = dividend_low >> 32; 1042 d = dividend_low & 0xffffffff; 1043 1044 w = a / divisor; 1045 ra = ((u64)(a - (w * divisor)) << 32) + b; 1046 1047 rb = ((u64) do_div(ra, divisor) << 32) + c; 1048 x = ra; 1049 1050 rc = ((u64) do_div(rb, divisor) << 32) + d; 1051 y = rb; 1052 1053 do_div(rc, divisor); 1054 z = rc; 1055 1056 dr->result_high = ((u64)w << 32) + x; 1057 dr->result_low = ((u64)y << 32) + z; 1058 1059 } 1060 1061 /* We don't need to calibrate delay, we use the CPU timebase for that */ 1062 void calibrate_delay(void) 1063 { 1064 /* Some generic code (such as spinlock debug) use loops_per_jiffy 1065 * as the number of __delay(1) in a jiffy, so make it so 1066 */ 1067 loops_per_jiffy = tb_ticks_per_jiffy; 1068 } 1069 1070 #if IS_ENABLED(CONFIG_RTC_DRV_GENERIC) 1071 static int rtc_generic_get_time(struct device *dev, struct rtc_time *tm) 1072 { 1073 ppc_md.get_rtc_time(tm); 1074 return 0; 1075 } 1076 1077 static int rtc_generic_set_time(struct device *dev, struct rtc_time *tm) 1078 { 1079 if (!ppc_md.set_rtc_time) 1080 return -EOPNOTSUPP; 1081 1082 if (ppc_md.set_rtc_time(tm) < 0) 1083 return -EOPNOTSUPP; 1084 1085 return 0; 1086 } 1087 1088 static const struct rtc_class_ops rtc_generic_ops = { 1089 .read_time = rtc_generic_get_time, 1090 .set_time = rtc_generic_set_time, 1091 }; 1092 1093 static int __init rtc_init(void) 1094 { 1095 struct platform_device *pdev; 1096 1097 if (!ppc_md.get_rtc_time) 1098 return -ENODEV; 1099 1100 pdev = platform_device_register_data(NULL, "rtc-generic", -1, 1101 &rtc_generic_ops, 1102 sizeof(rtc_generic_ops)); 1103 1104 return PTR_ERR_OR_ZERO(pdev); 1105 } 1106 1107 device_initcall(rtc_init); 1108 #endif 1109