1 /* 2 * Copyright 2006 Andi Kleen, SUSE Labs. 3 * Subject to the GNU Public License, v.2 4 * 5 * Fast user context implementation of clock_gettime, gettimeofday, and time. 6 * 7 * 32 Bit compat layer by Stefani Seibold <stefani@seibold.net> 8 * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany 9 * 10 * The code should have no internal unresolved relocations. 11 * Check with readelf after changing. 12 */ 13 14 #include <uapi/linux/time.h> 15 #include <asm/vgtod.h> 16 #include <asm/vvar.h> 17 #include <asm/unistd.h> 18 #include <asm/msr.h> 19 #include <asm/pvclock.h> 20 #include <linux/math64.h> 21 #include <linux/time.h> 22 #include <linux/kernel.h> 23 24 #define gtod (&VVAR(vsyscall_gtod_data)) 25 26 extern int __vdso_clock_gettime(clockid_t clock, struct timespec *ts); 27 extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz); 28 extern time_t __vdso_time(time_t *t); 29 30 #ifdef CONFIG_PARAVIRT_CLOCK 31 extern u8 pvclock_page 32 __attribute__((visibility("hidden"))); 33 #endif 34 35 #ifndef BUILD_VDSO32 36 37 notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) 38 { 39 long ret; 40 asm("syscall" : "=a" (ret) : 41 "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : "memory"); 42 return ret; 43 } 44 45 notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) 46 { 47 long ret; 48 49 asm("syscall" : "=a" (ret) : 50 "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory"); 51 return ret; 52 } 53 54 55 #else 56 57 notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) 58 { 59 long ret; 60 61 asm( 62 "mov %%ebx, %%edx \n" 63 "mov %2, %%ebx \n" 64 "call __kernel_vsyscall \n" 65 "mov %%edx, %%ebx \n" 66 : "=a" (ret) 67 : "0" (__NR_clock_gettime), "g" (clock), "c" (ts) 68 : "memory", "edx"); 69 return ret; 70 } 71 72 notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) 73 { 74 long ret; 75 76 asm( 77 "mov %%ebx, %%edx \n" 78 "mov %2, %%ebx \n" 79 "call __kernel_vsyscall \n" 80 "mov %%edx, %%ebx \n" 81 : "=a" (ret) 82 : "0" (__NR_gettimeofday), "g" (tv), "c" (tz) 83 : "memory", "edx"); 84 return ret; 85 } 86 87 #endif 88 89 #ifdef CONFIG_PARAVIRT_CLOCK 90 static notrace const struct pvclock_vsyscall_time_info *get_pvti0(void) 91 { 92 return (const struct pvclock_vsyscall_time_info *)&pvclock_page; 93 } 94 95 static notrace cycle_t vread_pvclock(int *mode) 96 { 97 const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti; 98 cycle_t ret; 99 u64 tsc, pvti_tsc; 100 u64 last, delta, pvti_system_time; 101 u32 version, pvti_tsc_to_system_mul, pvti_tsc_shift; 102 103 /* 104 * Note: The kernel and hypervisor must guarantee that cpu ID 105 * number maps 1:1 to per-CPU pvclock time info. 106 * 107 * Because the hypervisor is entirely unaware of guest userspace 108 * preemption, it cannot guarantee that per-CPU pvclock time 109 * info is updated if the underlying CPU changes or that that 110 * version is increased whenever underlying CPU changes. 111 * 112 * On KVM, we are guaranteed that pvti updates for any vCPU are 113 * atomic as seen by *all* vCPUs. This is an even stronger 114 * guarantee than we get with a normal seqlock. 115 * 116 * On Xen, we don't appear to have that guarantee, but Xen still 117 * supplies a valid seqlock using the version field. 118 * 119 * We only do pvclock vdso timing at all if 120 * PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to 121 * mean that all vCPUs have matching pvti and that the TSC is 122 * synced, so we can just look at vCPU 0's pvti. 123 */ 124 125 do { 126 version = pvti->version; 127 128 smp_rmb(); 129 130 if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) { 131 *mode = VCLOCK_NONE; 132 return 0; 133 } 134 135 tsc = rdtsc_ordered(); 136 pvti_tsc_to_system_mul = pvti->tsc_to_system_mul; 137 pvti_tsc_shift = pvti->tsc_shift; 138 pvti_system_time = pvti->system_time; 139 pvti_tsc = pvti->tsc_timestamp; 140 141 /* Make sure that the version double-check is last. */ 142 smp_rmb(); 143 } while (unlikely((version & 1) || version != pvti->version)); 144 145 delta = tsc - pvti_tsc; 146 ret = pvti_system_time + 147 pvclock_scale_delta(delta, pvti_tsc_to_system_mul, 148 pvti_tsc_shift); 149 150 /* refer to vread_tsc() comment for rationale */ 151 last = gtod->cycle_last; 152 153 if (likely(ret >= last)) 154 return ret; 155 156 return last; 157 } 158 #endif 159 160 notrace static cycle_t vread_tsc(void) 161 { 162 cycle_t ret = (cycle_t)rdtsc_ordered(); 163 u64 last = gtod->cycle_last; 164 165 if (likely(ret >= last)) 166 return ret; 167 168 /* 169 * GCC likes to generate cmov here, but this branch is extremely 170 * predictable (it's just a function of time and the likely is 171 * very likely) and there's a data dependence, so force GCC 172 * to generate a branch instead. I don't barrier() because 173 * we don't actually need a barrier, and if this function 174 * ever gets inlined it will generate worse code. 175 */ 176 asm volatile (""); 177 return last; 178 } 179 180 notrace static inline u64 vgetsns(int *mode) 181 { 182 u64 v; 183 cycles_t cycles; 184 185 if (gtod->vclock_mode == VCLOCK_TSC) 186 cycles = vread_tsc(); 187 #ifdef CONFIG_PARAVIRT_CLOCK 188 else if (gtod->vclock_mode == VCLOCK_PVCLOCK) 189 cycles = vread_pvclock(mode); 190 #endif 191 else 192 return 0; 193 v = (cycles - gtod->cycle_last) & gtod->mask; 194 return v * gtod->mult; 195 } 196 197 /* Code size doesn't matter (vdso is 4k anyway) and this is faster. */ 198 notrace static int __always_inline do_realtime(struct timespec *ts) 199 { 200 unsigned long seq; 201 u64 ns; 202 int mode; 203 204 do { 205 seq = gtod_read_begin(gtod); 206 mode = gtod->vclock_mode; 207 ts->tv_sec = gtod->wall_time_sec; 208 ns = gtod->wall_time_snsec; 209 ns += vgetsns(&mode); 210 ns >>= gtod->shift; 211 } while (unlikely(gtod_read_retry(gtod, seq))); 212 213 ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); 214 ts->tv_nsec = ns; 215 216 return mode; 217 } 218 219 notrace static int __always_inline do_monotonic(struct timespec *ts) 220 { 221 unsigned long seq; 222 u64 ns; 223 int mode; 224 225 do { 226 seq = gtod_read_begin(gtod); 227 mode = gtod->vclock_mode; 228 ts->tv_sec = gtod->monotonic_time_sec; 229 ns = gtod->monotonic_time_snsec; 230 ns += vgetsns(&mode); 231 ns >>= gtod->shift; 232 } while (unlikely(gtod_read_retry(gtod, seq))); 233 234 ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); 235 ts->tv_nsec = ns; 236 237 return mode; 238 } 239 240 notrace static void do_realtime_coarse(struct timespec *ts) 241 { 242 unsigned long seq; 243 do { 244 seq = gtod_read_begin(gtod); 245 ts->tv_sec = gtod->wall_time_coarse_sec; 246 ts->tv_nsec = gtod->wall_time_coarse_nsec; 247 } while (unlikely(gtod_read_retry(gtod, seq))); 248 } 249 250 notrace static void do_monotonic_coarse(struct timespec *ts) 251 { 252 unsigned long seq; 253 do { 254 seq = gtod_read_begin(gtod); 255 ts->tv_sec = gtod->monotonic_time_coarse_sec; 256 ts->tv_nsec = gtod->monotonic_time_coarse_nsec; 257 } while (unlikely(gtod_read_retry(gtod, seq))); 258 } 259 260 notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) 261 { 262 switch (clock) { 263 case CLOCK_REALTIME: 264 if (do_realtime(ts) == VCLOCK_NONE) 265 goto fallback; 266 break; 267 case CLOCK_MONOTONIC: 268 if (do_monotonic(ts) == VCLOCK_NONE) 269 goto fallback; 270 break; 271 case CLOCK_REALTIME_COARSE: 272 do_realtime_coarse(ts); 273 break; 274 case CLOCK_MONOTONIC_COARSE: 275 do_monotonic_coarse(ts); 276 break; 277 default: 278 goto fallback; 279 } 280 281 return 0; 282 fallback: 283 return vdso_fallback_gettime(clock, ts); 284 } 285 int clock_gettime(clockid_t, struct timespec *) 286 __attribute__((weak, alias("__vdso_clock_gettime"))); 287 288 notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) 289 { 290 if (likely(tv != NULL)) { 291 if (unlikely(do_realtime((struct timespec *)tv) == VCLOCK_NONE)) 292 return vdso_fallback_gtod(tv, tz); 293 tv->tv_usec /= 1000; 294 } 295 if (unlikely(tz != NULL)) { 296 tz->tz_minuteswest = gtod->tz_minuteswest; 297 tz->tz_dsttime = gtod->tz_dsttime; 298 } 299 300 return 0; 301 } 302 int gettimeofday(struct timeval *, struct timezone *) 303 __attribute__((weak, alias("__vdso_gettimeofday"))); 304 305 /* 306 * This will break when the xtime seconds get inaccurate, but that is 307 * unlikely 308 */ 309 notrace time_t __vdso_time(time_t *t) 310 { 311 /* This is atomic on x86 so we don't need any locks. */ 312 time_t result = ACCESS_ONCE(gtod->wall_time_sec); 313 314 if (t) 315 *t = result; 316 return result; 317 } 318 int time(time_t *t) 319 __attribute__((weak, alias("__vdso_time"))); 320