1*d2912cb1SThomas Gleixner /* SPDX-License-Identifier: GPL-2.0-only */ 2b4f151ffSDavid Howells /* 3b4f151ffSDavid Howells * Extend a 32-bit counter to 63 bits 4b4f151ffSDavid Howells * 5b4f151ffSDavid Howells * Author: Nicolas Pitre 6b4f151ffSDavid Howells * Created: December 3, 2006 7b4f151ffSDavid Howells * Copyright: MontaVista Software, Inc. 8b4f151ffSDavid Howells */ 9b4f151ffSDavid Howells 10b4f151ffSDavid Howells #ifndef __LINUX_CNT32_TO_63_H__ 11b4f151ffSDavid Howells #define __LINUX_CNT32_TO_63_H__ 12b4f151ffSDavid Howells 13b4f151ffSDavid Howells #include <linux/compiler.h> 14b4f151ffSDavid Howells #include <linux/types.h> 15b4f151ffSDavid Howells #include <asm/byteorder.h> 16b4f151ffSDavid Howells 17b4f151ffSDavid Howells /* this is used only to give gcc a clue about good code generation */ 18b4f151ffSDavid Howells union cnt32_to_63 { 19b4f151ffSDavid Howells struct { 20b4f151ffSDavid Howells #if defined(__LITTLE_ENDIAN) 21b4f151ffSDavid Howells u32 lo, hi; 22b4f151ffSDavid Howells #elif defined(__BIG_ENDIAN) 23b4f151ffSDavid Howells u32 hi, lo; 24b4f151ffSDavid Howells #endif 25b4f151ffSDavid Howells }; 26b4f151ffSDavid Howells u64 val; 27b4f151ffSDavid Howells }; 28b4f151ffSDavid Howells 29b4f151ffSDavid Howells 30b4f151ffSDavid Howells /** 31b4f151ffSDavid Howells * cnt32_to_63 - Expand a 32-bit counter to a 63-bit counter 32b4f151ffSDavid Howells * @cnt_lo: The low part of the counter 33b4f151ffSDavid Howells * 34b4f151ffSDavid Howells * Many hardware clock counters are only 32 bits wide and therefore have 35b4f151ffSDavid Howells * a relatively short period making wrap-arounds rather frequent. This 36b4f151ffSDavid Howells * is a problem when implementing sched_clock() for example, where a 64-bit 37b4f151ffSDavid Howells * non-wrapping monotonic value is expected to be returned. 38b4f151ffSDavid Howells * 39b4f151ffSDavid Howells * To overcome that limitation, let's extend a 32-bit counter to 63 bits 40b4f151ffSDavid Howells * in a completely lock free fashion. Bits 0 to 31 of the clock are provided 41b4f151ffSDavid Howells * by the hardware while bits 32 to 62 are stored in memory. The top bit in 42b4f151ffSDavid Howells * memory is used to synchronize with the hardware clock half-period. When 43b4f151ffSDavid Howells * the top bit of both counters (hardware and in memory) differ then the 44b4f151ffSDavid Howells * memory is updated with a new value, incrementing it when the hardware 45b4f151ffSDavid Howells * counter wraps around. 46b4f151ffSDavid Howells * 47b4f151ffSDavid Howells * Because a word store in memory is atomic then the incremented value will 48b4f151ffSDavid Howells * always be in synch with the top bit indicating to any potential concurrent 49b4f151ffSDavid Howells * reader if the value in memory is up to date or not with regards to the 50b4f151ffSDavid Howells * needed increment. And any race in updating the value in memory is harmless 51b4f151ffSDavid Howells * as the same value would simply be stored more than once. 52b4f151ffSDavid Howells * 53058e3739SNicolas Pitre * The restrictions for the algorithm to work properly are: 54058e3739SNicolas Pitre * 55058e3739SNicolas Pitre * 1) this code must be called at least once per each half period of the 56058e3739SNicolas Pitre * 32-bit counter; 57058e3739SNicolas Pitre * 58058e3739SNicolas Pitre * 2) this code must not be preempted for a duration longer than the 59058e3739SNicolas Pitre * 32-bit counter half period minus the longest period between two 60b8da46d3SNicolas Pitre * calls to this code; 61058e3739SNicolas Pitre * 62058e3739SNicolas Pitre * Those requirements ensure proper update to the state bit in memory. 63058e3739SNicolas Pitre * This is usually not a problem in practice, but if it is then a kernel 64058e3739SNicolas Pitre * timer should be scheduled to manage for this code to be executed often 65058e3739SNicolas Pitre * enough. 66b4f151ffSDavid Howells * 67b8da46d3SNicolas Pitre * And finally: 68b8da46d3SNicolas Pitre * 69b8da46d3SNicolas Pitre * 3) the cnt_lo argument must be seen as a globally incrementing value, 70b8da46d3SNicolas Pitre * meaning that it should be a direct reference to the counter data which 71b8da46d3SNicolas Pitre * can be evaluated according to a specific ordering within the macro, 72b8da46d3SNicolas Pitre * and not the result of a previous evaluation stored in a variable. 73b8da46d3SNicolas Pitre * 74b8da46d3SNicolas Pitre * For example, this is wrong: 75b8da46d3SNicolas Pitre * 76b8da46d3SNicolas Pitre * u32 partial = get_hw_count(); 77b8da46d3SNicolas Pitre * u64 full = cnt32_to_63(partial); 78b8da46d3SNicolas Pitre * return full; 79b8da46d3SNicolas Pitre * 80b8da46d3SNicolas Pitre * This is fine: 81b8da46d3SNicolas Pitre * 82b8da46d3SNicolas Pitre * u64 full = cnt32_to_63(get_hw_count()); 83b8da46d3SNicolas Pitre * return full; 84b8da46d3SNicolas Pitre * 85b4f151ffSDavid Howells * Note that the top bit (bit 63) in the returned value should be considered 86b4f151ffSDavid Howells * as garbage. It is not cleared here because callers are likely to use a 87b4f151ffSDavid Howells * multiplier on the returned value which can get rid of the top bit 88b4f151ffSDavid Howells * implicitly by making the multiplier even, therefore saving on a runtime 89b4f151ffSDavid Howells * clear-bit instruction. Otherwise caller must remember to clear the top 90b4f151ffSDavid Howells * bit explicitly. 91b4f151ffSDavid Howells */ 92b4f151ffSDavid Howells #define cnt32_to_63(cnt_lo) \ 93b4f151ffSDavid Howells ({ \ 94058e3739SNicolas Pitre static u32 __m_cnt_hi; \ 95b4f151ffSDavid Howells union cnt32_to_63 __x; \ 96b4f151ffSDavid Howells __x.hi = __m_cnt_hi; \ 97058e3739SNicolas Pitre smp_rmb(); \ 98b4f151ffSDavid Howells __x.lo = (cnt_lo); \ 99b4f151ffSDavid Howells if (unlikely((s32)(__x.hi ^ __x.lo) < 0)) \ 100b4f151ffSDavid Howells __m_cnt_hi = __x.hi = (__x.hi ^ 0x80000000) + (__x.hi >> 31); \ 101b4f151ffSDavid Howells __x.val; \ 102b4f151ffSDavid Howells }) 103b4f151ffSDavid Howells 104b4f151ffSDavid Howells #endif 105