1 /* 2 * Copyright 2006 Andi Kleen, SUSE Labs. 3 * Subject to the GNU Public License, v.2 4 * 5 * Fast user context implementation of clock_gettime, gettimeofday, and time. 6 * 7 * 32 Bit compat layer by Stefani Seibold <stefani@seibold.net> 8 * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany 9 * 10 * The code should have no internal unresolved relocations. 11 * Check with readelf after changing. 12 */ 13 14 #include <uapi/linux/time.h> 15 #include <asm/vgtod.h> 16 #include <asm/hpet.h> 17 #include <asm/vvar.h> 18 #include <asm/unistd.h> 19 #include <asm/msr.h> 20 #include <asm/pvclock.h> 21 #include <linux/math64.h> 22 #include <linux/time.h> 23 #include <linux/kernel.h> 24 25 #define gtod (&VVAR(vsyscall_gtod_data)) 26 27 extern int __vdso_clock_gettime(clockid_t clock, struct timespec *ts); 28 extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz); 29 extern time_t __vdso_time(time_t *t); 30 31 #ifdef CONFIG_HPET_TIMER 32 extern u8 hpet_page 33 __attribute__((visibility("hidden"))); 34 35 static notrace cycle_t vread_hpet(void) 36 { 37 return *(const volatile u32 *)(&hpet_page + HPET_COUNTER); 38 } 39 #endif 40 41 #ifdef CONFIG_PARAVIRT_CLOCK 42 extern u8 pvclock_page 43 __attribute__((visibility("hidden"))); 44 #endif 45 46 #ifndef BUILD_VDSO32 47 48 notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) 49 { 50 long ret; 51 asm("syscall" : "=a" (ret) : 52 "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : "memory"); 53 return ret; 54 } 55 56 notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) 57 { 58 long ret; 59 60 asm("syscall" : "=a" (ret) : 61 "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory"); 62 return ret; 63 } 64 65 66 #else 67 68 notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) 69 { 70 long ret; 71 72 asm( 73 "mov %%ebx, %%edx \n" 74 "mov %2, %%ebx \n" 75 "call __kernel_vsyscall \n" 76 "mov %%edx, %%ebx \n" 77 : "=a" (ret) 78 : "0" (__NR_clock_gettime), "g" (clock), "c" (ts) 79 : "memory", "edx"); 80 return ret; 81 } 82 83 notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) 84 { 85 long ret; 86 87 asm( 88 "mov %%ebx, %%edx \n" 89 "mov %2, %%ebx \n" 90 "call __kernel_vsyscall \n" 91 "mov %%edx, %%ebx \n" 92 : "=a" (ret) 93 : "0" (__NR_gettimeofday), "g" (tv), "c" (tz) 94 : "memory", "edx"); 95 return ret; 96 } 97 98 #endif 99 100 #ifdef CONFIG_PARAVIRT_CLOCK 101 static notrace const struct pvclock_vsyscall_time_info *get_pvti0(void) 102 { 103 return (const struct pvclock_vsyscall_time_info *)&pvclock_page; 104 } 105 106 static notrace cycle_t vread_pvclock(int *mode) 107 { 108 const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti; 109 cycle_t ret; 110 u64 tsc, pvti_tsc; 111 u64 last, delta, pvti_system_time; 112 u32 version, pvti_tsc_to_system_mul, pvti_tsc_shift; 113 114 /* 115 * Note: The kernel and hypervisor must guarantee that cpu ID 116 * number maps 1:1 to per-CPU pvclock time info. 117 * 118 * Because the hypervisor is entirely unaware of guest userspace 119 * preemption, it cannot guarantee that per-CPU pvclock time 120 * info is updated if the underlying CPU changes or that that 121 * version is increased whenever underlying CPU changes. 122 * 123 * On KVM, we are guaranteed that pvti updates for any vCPU are 124 * atomic as seen by *all* vCPUs. This is an even stronger 125 * guarantee than we get with a normal seqlock. 126 * 127 * On Xen, we don't appear to have that guarantee, but Xen still 128 * supplies a valid seqlock using the version field. 129 * 130 * We only do pvclock vdso timing at all if 131 * PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to 132 * mean that all vCPUs have matching pvti and that the TSC is 133 * synced, so we can just look at vCPU 0's pvti. 134 */ 135 136 do { 137 version = pvti->version; 138 139 smp_rmb(); 140 141 if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) { 142 *mode = VCLOCK_NONE; 143 return 0; 144 } 145 146 tsc = rdtsc_ordered(); 147 pvti_tsc_to_system_mul = pvti->tsc_to_system_mul; 148 pvti_tsc_shift = pvti->tsc_shift; 149 pvti_system_time = pvti->system_time; 150 pvti_tsc = pvti->tsc_timestamp; 151 152 /* Make sure that the version double-check is last. */ 153 smp_rmb(); 154 } while (unlikely((version & 1) || version != pvti->version)); 155 156 delta = tsc - pvti_tsc; 157 ret = pvti_system_time + 158 pvclock_scale_delta(delta, pvti_tsc_to_system_mul, 159 pvti_tsc_shift); 160 161 /* refer to vread_tsc() comment for rationale */ 162 last = gtod->cycle_last; 163 164 if (likely(ret >= last)) 165 return ret; 166 167 return last; 168 } 169 #endif 170 171 notrace static cycle_t vread_tsc(void) 172 { 173 cycle_t ret = (cycle_t)rdtsc_ordered(); 174 u64 last = gtod->cycle_last; 175 176 if (likely(ret >= last)) 177 return ret; 178 179 /* 180 * GCC likes to generate cmov here, but this branch is extremely 181 * predictable (it's just a funciton of time and the likely is 182 * very likely) and there's a data dependence, so force GCC 183 * to generate a branch instead. I don't barrier() because 184 * we don't actually need a barrier, and if this function 185 * ever gets inlined it will generate worse code. 186 */ 187 asm volatile (""); 188 return last; 189 } 190 191 notrace static inline u64 vgetsns(int *mode) 192 { 193 u64 v; 194 cycles_t cycles; 195 196 if (gtod->vclock_mode == VCLOCK_TSC) 197 cycles = vread_tsc(); 198 #ifdef CONFIG_HPET_TIMER 199 else if (gtod->vclock_mode == VCLOCK_HPET) 200 cycles = vread_hpet(); 201 #endif 202 #ifdef CONFIG_PARAVIRT_CLOCK 203 else if (gtod->vclock_mode == VCLOCK_PVCLOCK) 204 cycles = vread_pvclock(mode); 205 #endif 206 else 207 return 0; 208 v = (cycles - gtod->cycle_last) & gtod->mask; 209 return v * gtod->mult; 210 } 211 212 /* Code size doesn't matter (vdso is 4k anyway) and this is faster. */ 213 notrace static int __always_inline do_realtime(struct timespec *ts) 214 { 215 unsigned long seq; 216 u64 ns; 217 int mode; 218 219 do { 220 seq = gtod_read_begin(gtod); 221 mode = gtod->vclock_mode; 222 ts->tv_sec = gtod->wall_time_sec; 223 ns = gtod->wall_time_snsec; 224 ns += vgetsns(&mode); 225 ns >>= gtod->shift; 226 } while (unlikely(gtod_read_retry(gtod, seq))); 227 228 ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); 229 ts->tv_nsec = ns; 230 231 return mode; 232 } 233 234 notrace static int __always_inline do_monotonic(struct timespec *ts) 235 { 236 unsigned long seq; 237 u64 ns; 238 int mode; 239 240 do { 241 seq = gtod_read_begin(gtod); 242 mode = gtod->vclock_mode; 243 ts->tv_sec = gtod->monotonic_time_sec; 244 ns = gtod->monotonic_time_snsec; 245 ns += vgetsns(&mode); 246 ns >>= gtod->shift; 247 } while (unlikely(gtod_read_retry(gtod, seq))); 248 249 ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); 250 ts->tv_nsec = ns; 251 252 return mode; 253 } 254 255 notrace static void do_realtime_coarse(struct timespec *ts) 256 { 257 unsigned long seq; 258 do { 259 seq = gtod_read_begin(gtod); 260 ts->tv_sec = gtod->wall_time_coarse_sec; 261 ts->tv_nsec = gtod->wall_time_coarse_nsec; 262 } while (unlikely(gtod_read_retry(gtod, seq))); 263 } 264 265 notrace static void do_monotonic_coarse(struct timespec *ts) 266 { 267 unsigned long seq; 268 do { 269 seq = gtod_read_begin(gtod); 270 ts->tv_sec = gtod->monotonic_time_coarse_sec; 271 ts->tv_nsec = gtod->monotonic_time_coarse_nsec; 272 } while (unlikely(gtod_read_retry(gtod, seq))); 273 } 274 275 notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) 276 { 277 switch (clock) { 278 case CLOCK_REALTIME: 279 if (do_realtime(ts) == VCLOCK_NONE) 280 goto fallback; 281 break; 282 case CLOCK_MONOTONIC: 283 if (do_monotonic(ts) == VCLOCK_NONE) 284 goto fallback; 285 break; 286 case CLOCK_REALTIME_COARSE: 287 do_realtime_coarse(ts); 288 break; 289 case CLOCK_MONOTONIC_COARSE: 290 do_monotonic_coarse(ts); 291 break; 292 default: 293 goto fallback; 294 } 295 296 return 0; 297 fallback: 298 return vdso_fallback_gettime(clock, ts); 299 } 300 int clock_gettime(clockid_t, struct timespec *) 301 __attribute__((weak, alias("__vdso_clock_gettime"))); 302 303 notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) 304 { 305 if (likely(tv != NULL)) { 306 if (unlikely(do_realtime((struct timespec *)tv) == VCLOCK_NONE)) 307 return vdso_fallback_gtod(tv, tz); 308 tv->tv_usec /= 1000; 309 } 310 if (unlikely(tz != NULL)) { 311 tz->tz_minuteswest = gtod->tz_minuteswest; 312 tz->tz_dsttime = gtod->tz_dsttime; 313 } 314 315 return 0; 316 } 317 int gettimeofday(struct timeval *, struct timezone *) 318 __attribute__((weak, alias("__vdso_gettimeofday"))); 319 320 /* 321 * This will break when the xtime seconds get inaccurate, but that is 322 * unlikely 323 */ 324 notrace time_t __vdso_time(time_t *t) 325 { 326 /* This is atomic on x86 so we don't need any locks. */ 327 time_t result = ACCESS_ONCE(gtod->wall_time_sec); 328 329 if (t) 330 *t = result; 331 return result; 332 } 333 int time(time_t *t) 334 __attribute__((weak, alias("__vdso_time"))); 335