1 /* 2 * linux/kernel/time/timekeeping.c 3 * 4 * Kernel timekeeping code and accessor functions 5 * 6 * This code was moved from linux/kernel/timer.c. 7 * Please see that file for copyright and history logs. 8 * 9 */ 10 11 #include <linux/module.h> 12 #include <linux/interrupt.h> 13 #include <linux/percpu.h> 14 #include <linux/init.h> 15 #include <linux/mm.h> 16 #include <linux/sysdev.h> 17 #include <linux/clocksource.h> 18 #include <linux/jiffies.h> 19 #include <linux/time.h> 20 #include <linux/tick.h> 21 22 23 /* 24 * This read-write spinlock protects us from races in SMP while 25 * playing with xtime and avenrun. 26 */ 27 __attribute__((weak)) __cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock); 28 29 EXPORT_SYMBOL(xtime_lock); 30 31 32 /* 33 * The current time 34 * wall_to_monotonic is what we need to add to xtime (or xtime corrected 35 * for sub jiffie times) to get to monotonic time. Monotonic is pegged 36 * at zero at system boot time, so wall_to_monotonic will be negative, 37 * however, we will ALWAYS keep the tv_nsec part positive so we can use 38 * the usual normalization. 39 */ 40 struct timespec xtime __attribute__ ((aligned (16))); 41 struct timespec wall_to_monotonic __attribute__ ((aligned (16))); 42 43 EXPORT_SYMBOL(xtime); 44 45 46 static struct clocksource *clock; /* pointer to current clocksource */ 47 48 49 #ifdef CONFIG_GENERIC_TIME 50 /** 51 * __get_nsec_offset - Returns nanoseconds since last call to periodic_hook 52 * 53 * private function, must hold xtime_lock lock when being 54 * called. Returns the number of nanoseconds since the 55 * last call to update_wall_time() (adjusted by NTP scaling) 56 */ 57 static inline s64 __get_nsec_offset(void) 58 { 59 cycle_t cycle_now, cycle_delta; 60 s64 ns_offset; 61 62 /* read clocksource: */ 63 cycle_now = clocksource_read(clock); 64 65 /* calculate the delta since the last update_wall_time: */ 66 cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; 67 68 /* convert to nanoseconds: */ 69 ns_offset = cyc2ns(clock, cycle_delta); 70 71 return ns_offset; 72 } 73 74 /** 75 * __get_realtime_clock_ts - Returns the time of day in a timespec 76 * @ts: pointer to the timespec to be set 77 * 78 * Returns the time of day in a timespec. Used by 79 * do_gettimeofday() and get_realtime_clock_ts(). 80 */ 81 static inline void __get_realtime_clock_ts(struct timespec *ts) 82 { 83 unsigned long seq; 84 s64 nsecs; 85 86 do { 87 seq = read_seqbegin(&xtime_lock); 88 89 *ts = xtime; 90 nsecs = __get_nsec_offset(); 91 92 } while (read_seqretry(&xtime_lock, seq)); 93 94 timespec_add_ns(ts, nsecs); 95 } 96 97 /** 98 * getnstimeofday - Returns the time of day in a timespec 99 * @ts: pointer to the timespec to be set 100 * 101 * Returns the time of day in a timespec. 102 */ 103 void getnstimeofday(struct timespec *ts) 104 { 105 __get_realtime_clock_ts(ts); 106 } 107 108 EXPORT_SYMBOL(getnstimeofday); 109 110 /** 111 * do_gettimeofday - Returns the time of day in a timeval 112 * @tv: pointer to the timeval to be set 113 * 114 * NOTE: Users should be converted to using get_realtime_clock_ts() 115 */ 116 void do_gettimeofday(struct timeval *tv) 117 { 118 struct timespec now; 119 120 __get_realtime_clock_ts(&now); 121 tv->tv_sec = now.tv_sec; 122 tv->tv_usec = now.tv_nsec/1000; 123 } 124 125 EXPORT_SYMBOL(do_gettimeofday); 126 /** 127 * do_settimeofday - Sets the time of day 128 * @tv: pointer to the timespec variable containing the new time 129 * 130 * Sets the time of day to the new time and update NTP and notify hrtimers 131 */ 132 int do_settimeofday(struct timespec *tv) 133 { 134 unsigned long flags; 135 time_t wtm_sec, sec = tv->tv_sec; 136 long wtm_nsec, nsec = tv->tv_nsec; 137 138 if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) 139 return -EINVAL; 140 141 write_seqlock_irqsave(&xtime_lock, flags); 142 143 nsec -= __get_nsec_offset(); 144 145 wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec); 146 wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec); 147 148 set_normalized_timespec(&xtime, sec, nsec); 149 set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); 150 151 clock->error = 0; 152 ntp_clear(); 153 154 update_vsyscall(&xtime, clock); 155 156 write_sequnlock_irqrestore(&xtime_lock, flags); 157 158 /* signal hrtimers about time change */ 159 clock_was_set(); 160 161 return 0; 162 } 163 164 EXPORT_SYMBOL(do_settimeofday); 165 166 /** 167 * change_clocksource - Swaps clocksources if a new one is available 168 * 169 * Accumulates current time interval and initializes new clocksource 170 */ 171 static void change_clocksource(void) 172 { 173 struct clocksource *new; 174 cycle_t now; 175 u64 nsec; 176 177 new = clocksource_get_next(); 178 179 if (clock == new) 180 return; 181 182 now = clocksource_read(new); 183 nsec = __get_nsec_offset(); 184 timespec_add_ns(&xtime, nsec); 185 186 clock = new; 187 clock->cycle_last = now; 188 189 clock->error = 0; 190 clock->xtime_nsec = 0; 191 clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH); 192 193 tick_clock_notify(); 194 195 printk(KERN_INFO "Time: %s clocksource has been installed.\n", 196 clock->name); 197 } 198 #else 199 static inline void change_clocksource(void) { } 200 #endif 201 202 /** 203 * timekeeping_is_continuous - check to see if timekeeping is free running 204 */ 205 int timekeeping_is_continuous(void) 206 { 207 unsigned long seq; 208 int ret; 209 210 do { 211 seq = read_seqbegin(&xtime_lock); 212 213 ret = clock->flags & CLOCK_SOURCE_VALID_FOR_HRES; 214 215 } while (read_seqretry(&xtime_lock, seq)); 216 217 return ret; 218 } 219 220 /** 221 * read_persistent_clock - Return time in seconds from the persistent clock. 222 * 223 * Weak dummy function for arches that do not yet support it. 224 * Returns seconds from epoch using the battery backed persistent clock. 225 * Returns zero if unsupported. 226 * 227 * XXX - Do be sure to remove it once all arches implement it. 228 */ 229 unsigned long __attribute__((weak)) read_persistent_clock(void) 230 { 231 return 0; 232 } 233 234 /* 235 * timekeeping_init - Initializes the clocksource and common timekeeping values 236 */ 237 void __init timekeeping_init(void) 238 { 239 unsigned long flags; 240 unsigned long sec = read_persistent_clock(); 241 242 write_seqlock_irqsave(&xtime_lock, flags); 243 244 ntp_clear(); 245 246 clock = clocksource_get_next(); 247 clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH); 248 clock->cycle_last = clocksource_read(clock); 249 250 xtime.tv_sec = sec; 251 xtime.tv_nsec = 0; 252 set_normalized_timespec(&wall_to_monotonic, 253 -xtime.tv_sec, -xtime.tv_nsec); 254 255 write_sequnlock_irqrestore(&xtime_lock, flags); 256 } 257 258 /* flag for if timekeeping is suspended */ 259 static int timekeeping_suspended; 260 /* time in seconds when suspend began */ 261 static unsigned long timekeeping_suspend_time; 262 263 /** 264 * timekeeping_resume - Resumes the generic timekeeping subsystem. 265 * @dev: unused 266 * 267 * This is for the generic clocksource timekeeping. 268 * xtime/wall_to_monotonic/jiffies/etc are 269 * still managed by arch specific suspend/resume code. 270 */ 271 static int timekeeping_resume(struct sys_device *dev) 272 { 273 unsigned long flags; 274 unsigned long now = read_persistent_clock(); 275 276 clocksource_resume(); 277 278 write_seqlock_irqsave(&xtime_lock, flags); 279 280 if (now && (now > timekeeping_suspend_time)) { 281 unsigned long sleep_length = now - timekeeping_suspend_time; 282 283 xtime.tv_sec += sleep_length; 284 wall_to_monotonic.tv_sec -= sleep_length; 285 } 286 /* re-base the last cycle value */ 287 clock->cycle_last = clocksource_read(clock); 288 clock->error = 0; 289 timekeeping_suspended = 0; 290 write_sequnlock_irqrestore(&xtime_lock, flags); 291 292 touch_softlockup_watchdog(); 293 294 clockevents_notify(CLOCK_EVT_NOTIFY_RESUME, NULL); 295 296 /* Resume hrtimers */ 297 hres_timers_resume(); 298 299 return 0; 300 } 301 302 static int timekeeping_suspend(struct sys_device *dev, pm_message_t state) 303 { 304 unsigned long flags; 305 306 write_seqlock_irqsave(&xtime_lock, flags); 307 timekeeping_suspended = 1; 308 timekeeping_suspend_time = read_persistent_clock(); 309 write_sequnlock_irqrestore(&xtime_lock, flags); 310 311 clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL); 312 313 return 0; 314 } 315 316 /* sysfs resume/suspend bits for timekeeping */ 317 static struct sysdev_class timekeeping_sysclass = { 318 .resume = timekeeping_resume, 319 .suspend = timekeeping_suspend, 320 set_kset_name("timekeeping"), 321 }; 322 323 static struct sys_device device_timer = { 324 .id = 0, 325 .cls = &timekeeping_sysclass, 326 }; 327 328 static int __init timekeeping_init_device(void) 329 { 330 int error = sysdev_class_register(&timekeeping_sysclass); 331 if (!error) 332 error = sysdev_register(&device_timer); 333 return error; 334 } 335 336 device_initcall(timekeeping_init_device); 337 338 /* 339 * If the error is already larger, we look ahead even further 340 * to compensate for late or lost adjustments. 341 */ 342 static __always_inline int clocksource_bigadjust(s64 error, s64 *interval, 343 s64 *offset) 344 { 345 s64 tick_error, i; 346 u32 look_ahead, adj; 347 s32 error2, mult; 348 349 /* 350 * Use the current error value to determine how much to look ahead. 351 * The larger the error the slower we adjust for it to avoid problems 352 * with losing too many ticks, otherwise we would overadjust and 353 * produce an even larger error. The smaller the adjustment the 354 * faster we try to adjust for it, as lost ticks can do less harm 355 * here. This is tuned so that an error of about 1 msec is adusted 356 * within about 1 sec (or 2^20 nsec in 2^SHIFT_HZ ticks). 357 */ 358 error2 = clock->error >> (TICK_LENGTH_SHIFT + 22 - 2 * SHIFT_HZ); 359 error2 = abs(error2); 360 for (look_ahead = 0; error2 > 0; look_ahead++) 361 error2 >>= 2; 362 363 /* 364 * Now calculate the error in (1 << look_ahead) ticks, but first 365 * remove the single look ahead already included in the error. 366 */ 367 tick_error = current_tick_length() >> 368 (TICK_LENGTH_SHIFT - clock->shift + 1); 369 tick_error -= clock->xtime_interval >> 1; 370 error = ((error - tick_error) >> look_ahead) + tick_error; 371 372 /* Finally calculate the adjustment shift value. */ 373 i = *interval; 374 mult = 1; 375 if (error < 0) { 376 error = -error; 377 *interval = -*interval; 378 *offset = -*offset; 379 mult = -1; 380 } 381 for (adj = 0; error > i; adj++) 382 error >>= 1; 383 384 *interval <<= adj; 385 *offset <<= adj; 386 return mult << adj; 387 } 388 389 /* 390 * Adjust the multiplier to reduce the error value, 391 * this is optimized for the most common adjustments of -1,0,1, 392 * for other values we can do a bit more work. 393 */ 394 static void clocksource_adjust(struct clocksource *clock, s64 offset) 395 { 396 s64 error, interval = clock->cycle_interval; 397 int adj; 398 399 error = clock->error >> (TICK_LENGTH_SHIFT - clock->shift - 1); 400 if (error > interval) { 401 error >>= 2; 402 if (likely(error <= interval)) 403 adj = 1; 404 else 405 adj = clocksource_bigadjust(error, &interval, &offset); 406 } else if (error < -interval) { 407 error >>= 2; 408 if (likely(error >= -interval)) { 409 adj = -1; 410 interval = -interval; 411 offset = -offset; 412 } else 413 adj = clocksource_bigadjust(error, &interval, &offset); 414 } else 415 return; 416 417 clock->mult += adj; 418 clock->xtime_interval += interval; 419 clock->xtime_nsec -= offset; 420 clock->error -= (interval - offset) << 421 (TICK_LENGTH_SHIFT - clock->shift); 422 } 423 424 /** 425 * update_wall_time - Uses the current clocksource to increment the wall time 426 * 427 * Called from the timer interrupt, must hold a write on xtime_lock. 428 */ 429 void update_wall_time(void) 430 { 431 cycle_t offset; 432 433 /* Make sure we're fully resumed: */ 434 if (unlikely(timekeeping_suspended)) 435 return; 436 437 #ifdef CONFIG_GENERIC_TIME 438 offset = (clocksource_read(clock) - clock->cycle_last) & clock->mask; 439 #else 440 offset = clock->cycle_interval; 441 #endif 442 clock->xtime_nsec += (s64)xtime.tv_nsec << clock->shift; 443 444 /* normally this loop will run just once, however in the 445 * case of lost or late ticks, it will accumulate correctly. 446 */ 447 while (offset >= clock->cycle_interval) { 448 /* accumulate one interval */ 449 clock->xtime_nsec += clock->xtime_interval; 450 clock->cycle_last += clock->cycle_interval; 451 offset -= clock->cycle_interval; 452 453 if (clock->xtime_nsec >= (u64)NSEC_PER_SEC << clock->shift) { 454 clock->xtime_nsec -= (u64)NSEC_PER_SEC << clock->shift; 455 xtime.tv_sec++; 456 second_overflow(); 457 } 458 459 /* interpolator bits */ 460 time_interpolator_update(clock->xtime_interval 461 >> clock->shift); 462 463 /* accumulate error between NTP and clock interval */ 464 clock->error += current_tick_length(); 465 clock->error -= clock->xtime_interval << (TICK_LENGTH_SHIFT - clock->shift); 466 } 467 468 /* correct the clock when NTP error is too big */ 469 clocksource_adjust(clock, offset); 470 471 /* store full nanoseconds into xtime */ 472 xtime.tv_nsec = (s64)clock->xtime_nsec >> clock->shift; 473 clock->xtime_nsec -= (s64)xtime.tv_nsec << clock->shift; 474 475 /* check to see if there is a new clocksource to use */ 476 change_clocksource(); 477 update_vsyscall(&xtime, clock); 478 } 479