1 /* 2 * Copyright 2006 Andi Kleen, SUSE Labs. 3 * Subject to the GNU Public License, v.2 4 * 5 * Fast user context implementation of clock_gettime, gettimeofday, and time. 6 * 7 * 32 Bit compat layer by Stefani Seibold <stefani@seibold.net> 8 * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany 9 * 10 * The code should have no internal unresolved relocations. 11 * Check with readelf after changing. 12 */ 13 14 #include <uapi/linux/time.h> 15 #include <asm/vgtod.h> 16 #include <asm/vvar.h> 17 #include <asm/unistd.h> 18 #include <asm/msr.h> 19 #include <asm/pvclock.h> 20 #include <linux/math64.h> 21 #include <linux/time.h> 22 #include <linux/kernel.h> 23 24 #define gtod (&VVAR(vsyscall_gtod_data)) 25 26 extern int __vdso_clock_gettime(clockid_t clock, struct timespec *ts); 27 extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz); 28 extern time_t __vdso_time(time_t *t); 29 30 #ifdef CONFIG_PARAVIRT_CLOCK 31 extern u8 pvclock_page 32 __attribute__((visibility("hidden"))); 33 #endif 34 35 #ifndef BUILD_VDSO32 36 37 notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) 38 { 39 long ret; 40 asm("syscall" : "=a" (ret) : 41 "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : "memory"); 42 return ret; 43 } 44 45 notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) 46 { 47 long ret; 48 49 asm("syscall" : "=a" (ret) : 50 "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory"); 51 return ret; 52 } 53 54 55 #else 56 57 notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) 58 { 59 long ret; 60 61 asm( 62 "mov %%ebx, %%edx \n" 63 "mov %2, %%ebx \n" 64 "call __kernel_vsyscall \n" 65 "mov %%edx, %%ebx \n" 66 : "=a" (ret) 67 : "0" (__NR_clock_gettime), "g" (clock), "c" (ts) 68 : "memory", "edx"); 69 return ret; 70 } 71 72 notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) 73 { 74 long ret; 75 76 asm( 77 "mov %%ebx, %%edx \n" 78 "mov %2, %%ebx \n" 79 "call __kernel_vsyscall \n" 80 "mov %%edx, %%ebx \n" 81 : "=a" (ret) 82 : "0" (__NR_gettimeofday), "g" (tv), "c" (tz) 83 : "memory", "edx"); 84 return ret; 85 } 86 87 #endif 88 89 #ifdef CONFIG_PARAVIRT_CLOCK 90 static notrace const struct pvclock_vsyscall_time_info *get_pvti0(void) 91 { 92 return (const struct pvclock_vsyscall_time_info *)&pvclock_page; 93 } 94 95 static notrace u64 vread_pvclock(int *mode) 96 { 97 const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti; 98 u64 ret; 99 u64 last; 100 u32 version; 101 102 /* 103 * Note: The kernel and hypervisor must guarantee that cpu ID 104 * number maps 1:1 to per-CPU pvclock time info. 105 * 106 * Because the hypervisor is entirely unaware of guest userspace 107 * preemption, it cannot guarantee that per-CPU pvclock time 108 * info is updated if the underlying CPU changes or that that 109 * version is increased whenever underlying CPU changes. 110 * 111 * On KVM, we are guaranteed that pvti updates for any vCPU are 112 * atomic as seen by *all* vCPUs. This is an even stronger 113 * guarantee than we get with a normal seqlock. 114 * 115 * On Xen, we don't appear to have that guarantee, but Xen still 116 * supplies a valid seqlock using the version field. 117 * 118 * We only do pvclock vdso timing at all if 119 * PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to 120 * mean that all vCPUs have matching pvti and that the TSC is 121 * synced, so we can just look at vCPU 0's pvti. 122 */ 123 124 do { 125 version = pvclock_read_begin(pvti); 126 127 if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) { 128 *mode = VCLOCK_NONE; 129 return 0; 130 } 131 132 ret = __pvclock_read_cycles(pvti, rdtsc_ordered()); 133 } while (pvclock_read_retry(pvti, version)); 134 135 /* refer to vread_tsc() comment for rationale */ 136 last = gtod->cycle_last; 137 138 if (likely(ret >= last)) 139 return ret; 140 141 return last; 142 } 143 #endif 144 145 notrace static u64 vread_tsc(void) 146 { 147 u64 ret = (u64)rdtsc_ordered(); 148 u64 last = gtod->cycle_last; 149 150 if (likely(ret >= last)) 151 return ret; 152 153 /* 154 * GCC likes to generate cmov here, but this branch is extremely 155 * predictable (it's just a function of time and the likely is 156 * very likely) and there's a data dependence, so force GCC 157 * to generate a branch instead. I don't barrier() because 158 * we don't actually need a barrier, and if this function 159 * ever gets inlined it will generate worse code. 160 */ 161 asm volatile (""); 162 return last; 163 } 164 165 notrace static inline u64 vgetsns(int *mode) 166 { 167 u64 v; 168 cycles_t cycles; 169 170 if (gtod->vclock_mode == VCLOCK_TSC) 171 cycles = vread_tsc(); 172 #ifdef CONFIG_PARAVIRT_CLOCK 173 else if (gtod->vclock_mode == VCLOCK_PVCLOCK) 174 cycles = vread_pvclock(mode); 175 #endif 176 else 177 return 0; 178 v = (cycles - gtod->cycle_last) & gtod->mask; 179 return v * gtod->mult; 180 } 181 182 /* Code size doesn't matter (vdso is 4k anyway) and this is faster. */ 183 notrace static int __always_inline do_realtime(struct timespec *ts) 184 { 185 unsigned long seq; 186 u64 ns; 187 int mode; 188 189 do { 190 seq = gtod_read_begin(gtod); 191 mode = gtod->vclock_mode; 192 ts->tv_sec = gtod->wall_time_sec; 193 ns = gtod->wall_time_snsec; 194 ns += vgetsns(&mode); 195 ns >>= gtod->shift; 196 } while (unlikely(gtod_read_retry(gtod, seq))); 197 198 ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); 199 ts->tv_nsec = ns; 200 201 return mode; 202 } 203 204 notrace static int __always_inline do_monotonic(struct timespec *ts) 205 { 206 unsigned long seq; 207 u64 ns; 208 int mode; 209 210 do { 211 seq = gtod_read_begin(gtod); 212 mode = gtod->vclock_mode; 213 ts->tv_sec = gtod->monotonic_time_sec; 214 ns = gtod->monotonic_time_snsec; 215 ns += vgetsns(&mode); 216 ns >>= gtod->shift; 217 } while (unlikely(gtod_read_retry(gtod, seq))); 218 219 ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); 220 ts->tv_nsec = ns; 221 222 return mode; 223 } 224 225 notrace static void do_realtime_coarse(struct timespec *ts) 226 { 227 unsigned long seq; 228 do { 229 seq = gtod_read_begin(gtod); 230 ts->tv_sec = gtod->wall_time_coarse_sec; 231 ts->tv_nsec = gtod->wall_time_coarse_nsec; 232 } while (unlikely(gtod_read_retry(gtod, seq))); 233 } 234 235 notrace static void do_monotonic_coarse(struct timespec *ts) 236 { 237 unsigned long seq; 238 do { 239 seq = gtod_read_begin(gtod); 240 ts->tv_sec = gtod->monotonic_time_coarse_sec; 241 ts->tv_nsec = gtod->monotonic_time_coarse_nsec; 242 } while (unlikely(gtod_read_retry(gtod, seq))); 243 } 244 245 notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) 246 { 247 switch (clock) { 248 case CLOCK_REALTIME: 249 if (do_realtime(ts) == VCLOCK_NONE) 250 goto fallback; 251 break; 252 case CLOCK_MONOTONIC: 253 if (do_monotonic(ts) == VCLOCK_NONE) 254 goto fallback; 255 break; 256 case CLOCK_REALTIME_COARSE: 257 do_realtime_coarse(ts); 258 break; 259 case CLOCK_MONOTONIC_COARSE: 260 do_monotonic_coarse(ts); 261 break; 262 default: 263 goto fallback; 264 } 265 266 return 0; 267 fallback: 268 return vdso_fallback_gettime(clock, ts); 269 } 270 int clock_gettime(clockid_t, struct timespec *) 271 __attribute__((weak, alias("__vdso_clock_gettime"))); 272 273 notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) 274 { 275 if (likely(tv != NULL)) { 276 if (unlikely(do_realtime((struct timespec *)tv) == VCLOCK_NONE)) 277 return vdso_fallback_gtod(tv, tz); 278 tv->tv_usec /= 1000; 279 } 280 if (unlikely(tz != NULL)) { 281 tz->tz_minuteswest = gtod->tz_minuteswest; 282 tz->tz_dsttime = gtod->tz_dsttime; 283 } 284 285 return 0; 286 } 287 int gettimeofday(struct timeval *, struct timezone *) 288 __attribute__((weak, alias("__vdso_gettimeofday"))); 289 290 /* 291 * This will break when the xtime seconds get inaccurate, but that is 292 * unlikely 293 */ 294 notrace time_t __vdso_time(time_t *t) 295 { 296 /* This is atomic on x86 so we don't need any locks. */ 297 time_t result = ACCESS_ONCE(gtod->wall_time_sec); 298 299 if (t) 300 *t = result; 301 return result; 302 } 303 int time(time_t *t) 304 __attribute__((weak, alias("__vdso_time"))); 305