1 /* 2 * Copyright 2006 Andi Kleen, SUSE Labs. 3 * Subject to the GNU Public License, v.2 4 * 5 * Fast user context implementation of clock_gettime, gettimeofday, and time. 6 * 7 * 32 Bit compat layer by Stefani Seibold <stefani@seibold.net> 8 * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany 9 * 10 * The code should have no internal unresolved relocations. 11 * Check with readelf after changing. 12 */ 13 14 #include <uapi/linux/time.h> 15 #include <asm/vgtod.h> 16 #include <asm/vvar.h> 17 #include <asm/unistd.h> 18 #include <asm/msr.h> 19 #include <asm/pvclock.h> 20 #include <asm/mshyperv.h> 21 #include <linux/math64.h> 22 #include <linux/time.h> 23 #include <linux/kernel.h> 24 25 #define gtod (&VVAR(vsyscall_gtod_data)) 26 27 extern int __vdso_clock_gettime(clockid_t clock, struct timespec *ts); 28 extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz); 29 extern time_t __vdso_time(time_t *t); 30 31 #ifdef CONFIG_PARAVIRT_CLOCK 32 extern u8 pvclock_page 33 __attribute__((visibility("hidden"))); 34 #endif 35 36 #ifdef CONFIG_HYPERV_TSCPAGE 37 extern u8 hvclock_page 38 __attribute__((visibility("hidden"))); 39 #endif 40 41 #ifndef BUILD_VDSO32 42 43 notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) 44 { 45 long ret; 46 asm("syscall" : "=a" (ret) : 47 "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : "memory"); 48 return ret; 49 } 50 51 notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) 52 { 53 long ret; 54 55 asm("syscall" : "=a" (ret) : 56 "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory"); 57 return ret; 58 } 59 60 61 #else 62 63 notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) 64 { 65 long ret; 66 67 asm( 68 "mov %%ebx, %%edx \n" 69 "mov %2, %%ebx \n" 70 "call __kernel_vsyscall \n" 71 "mov %%edx, %%ebx \n" 72 : "=a" (ret) 73 : "0" (__NR_clock_gettime), "g" (clock), "c" (ts) 74 : "memory", "edx"); 75 return ret; 76 } 77 78 notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) 79 { 80 long ret; 81 82 asm( 83 "mov %%ebx, %%edx \n" 84 "mov %2, %%ebx \n" 85 "call __kernel_vsyscall \n" 86 "mov %%edx, %%ebx \n" 87 : "=a" (ret) 88 : "0" (__NR_gettimeofday), "g" (tv), "c" (tz) 89 : "memory", "edx"); 90 return ret; 91 } 92 93 #endif 94 95 #ifdef CONFIG_PARAVIRT_CLOCK 96 static notrace const struct pvclock_vsyscall_time_info *get_pvti0(void) 97 { 98 return (const struct pvclock_vsyscall_time_info *)&pvclock_page; 99 } 100 101 static notrace u64 vread_pvclock(int *mode) 102 { 103 const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti; 104 u64 ret; 105 u64 last; 106 u32 version; 107 108 /* 109 * Note: The kernel and hypervisor must guarantee that cpu ID 110 * number maps 1:1 to per-CPU pvclock time info. 111 * 112 * Because the hypervisor is entirely unaware of guest userspace 113 * preemption, it cannot guarantee that per-CPU pvclock time 114 * info is updated if the underlying CPU changes or that that 115 * version is increased whenever underlying CPU changes. 116 * 117 * On KVM, we are guaranteed that pvti updates for any vCPU are 118 * atomic as seen by *all* vCPUs. This is an even stronger 119 * guarantee than we get with a normal seqlock. 120 * 121 * On Xen, we don't appear to have that guarantee, but Xen still 122 * supplies a valid seqlock using the version field. 123 * 124 * We only do pvclock vdso timing at all if 125 * PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to 126 * mean that all vCPUs have matching pvti and that the TSC is 127 * synced, so we can just look at vCPU 0's pvti. 128 */ 129 130 do { 131 version = pvclock_read_begin(pvti); 132 133 if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) { 134 *mode = VCLOCK_NONE; 135 return 0; 136 } 137 138 ret = __pvclock_read_cycles(pvti, rdtsc_ordered()); 139 } while (pvclock_read_retry(pvti, version)); 140 141 /* refer to vread_tsc() comment for rationale */ 142 last = gtod->cycle_last; 143 144 if (likely(ret >= last)) 145 return ret; 146 147 return last; 148 } 149 #endif 150 #ifdef CONFIG_HYPERV_TSCPAGE 151 static notrace u64 vread_hvclock(int *mode) 152 { 153 const struct ms_hyperv_tsc_page *tsc_pg = 154 (const struct ms_hyperv_tsc_page *)&hvclock_page; 155 u64 current_tick = hv_read_tsc_page(tsc_pg); 156 157 if (current_tick != U64_MAX) 158 return current_tick; 159 160 *mode = VCLOCK_NONE; 161 return 0; 162 } 163 #endif 164 165 notrace static u64 vread_tsc(void) 166 { 167 u64 ret = (u64)rdtsc_ordered(); 168 u64 last = gtod->cycle_last; 169 170 if (likely(ret >= last)) 171 return ret; 172 173 /* 174 * GCC likes to generate cmov here, but this branch is extremely 175 * predictable (it's just a function of time and the likely is 176 * very likely) and there's a data dependence, so force GCC 177 * to generate a branch instead. I don't barrier() because 178 * we don't actually need a barrier, and if this function 179 * ever gets inlined it will generate worse code. 180 */ 181 asm volatile (""); 182 return last; 183 } 184 185 notrace static inline u64 vgetsns(int *mode) 186 { 187 u64 v; 188 cycles_t cycles; 189 190 if (gtod->vclock_mode == VCLOCK_TSC) 191 cycles = vread_tsc(); 192 #ifdef CONFIG_PARAVIRT_CLOCK 193 else if (gtod->vclock_mode == VCLOCK_PVCLOCK) 194 cycles = vread_pvclock(mode); 195 #endif 196 #ifdef CONFIG_HYPERV_TSCPAGE 197 else if (gtod->vclock_mode == VCLOCK_HVCLOCK) 198 cycles = vread_hvclock(mode); 199 #endif 200 else 201 return 0; 202 v = (cycles - gtod->cycle_last) & gtod->mask; 203 return v * gtod->mult; 204 } 205 206 /* Code size doesn't matter (vdso is 4k anyway) and this is faster. */ 207 notrace static int __always_inline do_realtime(struct timespec *ts) 208 { 209 unsigned long seq; 210 u64 ns; 211 int mode; 212 213 do { 214 seq = gtod_read_begin(gtod); 215 mode = gtod->vclock_mode; 216 ts->tv_sec = gtod->wall_time_sec; 217 ns = gtod->wall_time_snsec; 218 ns += vgetsns(&mode); 219 ns >>= gtod->shift; 220 } while (unlikely(gtod_read_retry(gtod, seq))); 221 222 ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); 223 ts->tv_nsec = ns; 224 225 return mode; 226 } 227 228 notrace static int __always_inline do_monotonic(struct timespec *ts) 229 { 230 unsigned long seq; 231 u64 ns; 232 int mode; 233 234 do { 235 seq = gtod_read_begin(gtod); 236 mode = gtod->vclock_mode; 237 ts->tv_sec = gtod->monotonic_time_sec; 238 ns = gtod->monotonic_time_snsec; 239 ns += vgetsns(&mode); 240 ns >>= gtod->shift; 241 } while (unlikely(gtod_read_retry(gtod, seq))); 242 243 ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); 244 ts->tv_nsec = ns; 245 246 return mode; 247 } 248 249 notrace static void do_realtime_coarse(struct timespec *ts) 250 { 251 unsigned long seq; 252 do { 253 seq = gtod_read_begin(gtod); 254 ts->tv_sec = gtod->wall_time_coarse_sec; 255 ts->tv_nsec = gtod->wall_time_coarse_nsec; 256 } while (unlikely(gtod_read_retry(gtod, seq))); 257 } 258 259 notrace static void do_monotonic_coarse(struct timespec *ts) 260 { 261 unsigned long seq; 262 do { 263 seq = gtod_read_begin(gtod); 264 ts->tv_sec = gtod->monotonic_time_coarse_sec; 265 ts->tv_nsec = gtod->monotonic_time_coarse_nsec; 266 } while (unlikely(gtod_read_retry(gtod, seq))); 267 } 268 269 notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) 270 { 271 switch (clock) { 272 case CLOCK_REALTIME: 273 if (do_realtime(ts) == VCLOCK_NONE) 274 goto fallback; 275 break; 276 case CLOCK_MONOTONIC: 277 if (do_monotonic(ts) == VCLOCK_NONE) 278 goto fallback; 279 break; 280 case CLOCK_REALTIME_COARSE: 281 do_realtime_coarse(ts); 282 break; 283 case CLOCK_MONOTONIC_COARSE: 284 do_monotonic_coarse(ts); 285 break; 286 default: 287 goto fallback; 288 } 289 290 return 0; 291 fallback: 292 return vdso_fallback_gettime(clock, ts); 293 } 294 int clock_gettime(clockid_t, struct timespec *) 295 __attribute__((weak, alias("__vdso_clock_gettime"))); 296 297 notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) 298 { 299 if (likely(tv != NULL)) { 300 if (unlikely(do_realtime((struct timespec *)tv) == VCLOCK_NONE)) 301 return vdso_fallback_gtod(tv, tz); 302 tv->tv_usec /= 1000; 303 } 304 if (unlikely(tz != NULL)) { 305 tz->tz_minuteswest = gtod->tz_minuteswest; 306 tz->tz_dsttime = gtod->tz_dsttime; 307 } 308 309 return 0; 310 } 311 int gettimeofday(struct timeval *, struct timezone *) 312 __attribute__((weak, alias("__vdso_gettimeofday"))); 313 314 /* 315 * This will break when the xtime seconds get inaccurate, but that is 316 * unlikely 317 */ 318 notrace time_t __vdso_time(time_t *t) 319 { 320 /* This is atomic on x86 so we don't need any locks. */ 321 time_t result = READ_ONCE(gtod->wall_time_sec); 322 323 if (t) 324 *t = result; 325 return result; 326 } 327 time_t time(time_t *t) 328 __attribute__((weak, alias("__vdso_time"))); 329