xref: /openbmc/qemu/util/bufferiszero.c (revision 5e33a8722254f99cbce6ede73adb4b735d94f58f)
188ca8e80SRichard Henderson /*
288ca8e80SRichard Henderson  * Simple C functions to supplement the C library
388ca8e80SRichard Henderson  *
488ca8e80SRichard Henderson  * Copyright (c) 2006 Fabrice Bellard
588ca8e80SRichard Henderson  *
688ca8e80SRichard Henderson  * Permission is hereby granted, free of charge, to any person obtaining a copy
788ca8e80SRichard Henderson  * of this software and associated documentation files (the "Software"), to deal
888ca8e80SRichard Henderson  * in the Software without restriction, including without limitation the rights
988ca8e80SRichard Henderson  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
1088ca8e80SRichard Henderson  * copies of the Software, and to permit persons to whom the Software is
1188ca8e80SRichard Henderson  * furnished to do so, subject to the following conditions:
1288ca8e80SRichard Henderson  *
1388ca8e80SRichard Henderson  * The above copyright notice and this permission notice shall be included in
1488ca8e80SRichard Henderson  * all copies or substantial portions of the Software.
1588ca8e80SRichard Henderson  *
1688ca8e80SRichard Henderson  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1788ca8e80SRichard Henderson  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1888ca8e80SRichard Henderson  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
1988ca8e80SRichard Henderson  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
2088ca8e80SRichard Henderson  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
2188ca8e80SRichard Henderson  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
2288ca8e80SRichard Henderson  * THE SOFTWARE.
2388ca8e80SRichard Henderson  */
2488ca8e80SRichard Henderson #include "qemu/osdep.h"
2588ca8e80SRichard Henderson #include "qemu-common.h"
2688ca8e80SRichard Henderson #include "qemu/cutils.h"
27*5e33a872SRichard Henderson #include "qemu/bswap.h"
2888ca8e80SRichard Henderson 
2988ca8e80SRichard Henderson 
3088ca8e80SRichard Henderson /* vector definitions */
31*5e33a872SRichard Henderson 
32*5e33a872SRichard Henderson extern void link_error(void);
33*5e33a872SRichard Henderson 
34*5e33a872SRichard Henderson #define ACCEL_BUFFER_ZERO(NAME, SIZE, VECTYPE, NONZERO)         \
35*5e33a872SRichard Henderson static bool NAME(const void *buf, size_t len)                   \
36*5e33a872SRichard Henderson {                                                               \
37*5e33a872SRichard Henderson     const void *end = buf + len;                                \
38*5e33a872SRichard Henderson     do {                                                        \
39*5e33a872SRichard Henderson         const VECTYPE *p = buf;                                 \
40*5e33a872SRichard Henderson         VECTYPE t;                                              \
41*5e33a872SRichard Henderson         if (SIZE == sizeof(VECTYPE) * 4) {                      \
42*5e33a872SRichard Henderson             t = (p[0] | p[1]) | (p[2] | p[3]);                  \
43*5e33a872SRichard Henderson         } else if (SIZE == sizeof(VECTYPE) * 8) {               \
44*5e33a872SRichard Henderson             t  = p[0] | p[1];                                   \
45*5e33a872SRichard Henderson             t |= p[2] | p[3];                                   \
46*5e33a872SRichard Henderson             t |= p[4] | p[5];                                   \
47*5e33a872SRichard Henderson             t |= p[6] | p[7];                                   \
48*5e33a872SRichard Henderson         } else {                                                \
49*5e33a872SRichard Henderson             link_error();                                       \
50*5e33a872SRichard Henderson         }                                                       \
51*5e33a872SRichard Henderson         if (unlikely(NONZERO(t))) {                             \
52*5e33a872SRichard Henderson             return false;                                       \
53*5e33a872SRichard Henderson         }                                                       \
54*5e33a872SRichard Henderson         buf += SIZE;                                            \
55*5e33a872SRichard Henderson     } while (buf < end);                                        \
56*5e33a872SRichard Henderson     return true;                                                \
57*5e33a872SRichard Henderson }
58*5e33a872SRichard Henderson 
59*5e33a872SRichard Henderson static bool
60*5e33a872SRichard Henderson buffer_zero_int(const void *buf, size_t len)
61*5e33a872SRichard Henderson {
62*5e33a872SRichard Henderson     if (unlikely(len < 8)) {
63*5e33a872SRichard Henderson         /* For a very small buffer, simply accumulate all the bytes.  */
64*5e33a872SRichard Henderson         const unsigned char *p = buf;
65*5e33a872SRichard Henderson         const unsigned char *e = buf + len;
66*5e33a872SRichard Henderson         unsigned char t = 0;
67*5e33a872SRichard Henderson 
68*5e33a872SRichard Henderson         do {
69*5e33a872SRichard Henderson             t |= *p++;
70*5e33a872SRichard Henderson         } while (p < e);
71*5e33a872SRichard Henderson 
72*5e33a872SRichard Henderson         return t == 0;
73*5e33a872SRichard Henderson     } else {
74*5e33a872SRichard Henderson         /* Otherwise, use the unaligned memory access functions to
75*5e33a872SRichard Henderson            handle the beginning and end of the buffer, with a couple
76*5e33a872SRichard Henderson            of loops handling the middle aligned section.  */
77*5e33a872SRichard Henderson         uint64_t t = ldq_he_p(buf);
78*5e33a872SRichard Henderson         const uint64_t *p = (uint64_t *)(((uintptr_t)buf + 8) & -8);
79*5e33a872SRichard Henderson         const uint64_t *e = (uint64_t *)(((uintptr_t)buf + len) & -8);
80*5e33a872SRichard Henderson 
81*5e33a872SRichard Henderson         for (; p + 8 <= e; p += 8) {
82*5e33a872SRichard Henderson             __builtin_prefetch(p + 8);
83*5e33a872SRichard Henderson             if (t) {
84*5e33a872SRichard Henderson                 return false;
85*5e33a872SRichard Henderson             }
86*5e33a872SRichard Henderson             t = p[0] | p[1] | p[2] | p[3] | p[4] | p[5] | p[6] | p[7];
87*5e33a872SRichard Henderson         }
88*5e33a872SRichard Henderson         while (p < e) {
89*5e33a872SRichard Henderson             t |= *p++;
90*5e33a872SRichard Henderson         }
91*5e33a872SRichard Henderson         t |= ldq_he_p(buf + len - 8);
92*5e33a872SRichard Henderson 
93*5e33a872SRichard Henderson         return t == 0;
94*5e33a872SRichard Henderson     }
95*5e33a872SRichard Henderson }
96*5e33a872SRichard Henderson 
97*5e33a872SRichard Henderson #if defined(__ALTIVEC__)
9888ca8e80SRichard Henderson #include <altivec.h>
9988ca8e80SRichard Henderson /* The altivec.h header says we're allowed to undef these for
10088ca8e80SRichard Henderson  * C++ compatibility.  Here we don't care about C++, but we
10188ca8e80SRichard Henderson  * undef them anyway to avoid namespace pollution.
102*5e33a872SRichard Henderson  * altivec.h may redefine the bool macro as vector type.
103*5e33a872SRichard Henderson  * Reset it to POSIX semantics.
10488ca8e80SRichard Henderson  */
10588ca8e80SRichard Henderson #undef vector
10688ca8e80SRichard Henderson #undef pixel
10788ca8e80SRichard Henderson #undef bool
10888ca8e80SRichard Henderson #define bool _Bool
109*5e33a872SRichard Henderson #define DO_NONZERO(X)  vec_any_ne(X, (__vector unsigned char){ 0 })
110*5e33a872SRichard Henderson ACCEL_BUFFER_ZERO(buffer_zero_ppc, 128, __vector unsigned char, DO_NONZERO)
111*5e33a872SRichard Henderson 
112*5e33a872SRichard Henderson static bool select_accel_fn(const void *buf, size_t len)
113*5e33a872SRichard Henderson {
114*5e33a872SRichard Henderson     uintptr_t ibuf = (uintptr_t)buf;
115*5e33a872SRichard Henderson     if (len % 128 == 0 && ibuf % sizeof(__vector unsigned char) == 0) {
116*5e33a872SRichard Henderson         return buffer_zero_ppc(buf, len);
117*5e33a872SRichard Henderson     }
118*5e33a872SRichard Henderson     return buffer_zero_int(buf, len);
119*5e33a872SRichard Henderson }
120*5e33a872SRichard Henderson 
121*5e33a872SRichard Henderson #elif defined(CONFIG_AVX2_OPT) || (defined(CONFIG_CPUID_H) && defined(__SSE2__))
122*5e33a872SRichard Henderson #include <cpuid.h>
123*5e33a872SRichard Henderson 
124*5e33a872SRichard Henderson /* Do not use push_options pragmas unnecessarily, because clang
125*5e33a872SRichard Henderson  * does not support them.
126*5e33a872SRichard Henderson  */
127*5e33a872SRichard Henderson #ifndef __SSE2__
128*5e33a872SRichard Henderson #pragma GCC push_options
129*5e33a872SRichard Henderson #pragma GCC target("sse2")
130*5e33a872SRichard Henderson #endif
13188ca8e80SRichard Henderson #include <emmintrin.h>
132*5e33a872SRichard Henderson #define SSE2_NONZERO(X) \
133*5e33a872SRichard Henderson     (_mm_movemask_epi8(_mm_cmpeq_epi8((X), _mm_setzero_si128())) != 0xFFFF)
134*5e33a872SRichard Henderson ACCEL_BUFFER_ZERO(buffer_zero_sse2, 64, __m128i, SSE2_NONZERO)
135*5e33a872SRichard Henderson #ifndef __SSE2__
136*5e33a872SRichard Henderson #pragma GCC pop_options
13788ca8e80SRichard Henderson #endif
13888ca8e80SRichard Henderson 
139*5e33a872SRichard Henderson #ifdef CONFIG_AVX2_OPT
14088ca8e80SRichard Henderson #pragma GCC push_options
14188ca8e80SRichard Henderson #pragma GCC target("avx2")
14288ca8e80SRichard Henderson #include <immintrin.h>
143*5e33a872SRichard Henderson #define AVX2_NONZERO(X)  !_mm256_testz_si256((X), (X))
144*5e33a872SRichard Henderson ACCEL_BUFFER_ZERO(buffer_zero_avx2, 128, __m256i, AVX2_NONZERO)
14588ca8e80SRichard Henderson #pragma GCC pop_options
146*5e33a872SRichard Henderson #endif
147*5e33a872SRichard Henderson 
148*5e33a872SRichard Henderson #define CACHE_AVX2    2
149*5e33a872SRichard Henderson #define CACHE_AVX1    4
150*5e33a872SRichard Henderson #define CACHE_SSE4    8
151*5e33a872SRichard Henderson #define CACHE_SSE2    16
152*5e33a872SRichard Henderson 
153*5e33a872SRichard Henderson static unsigned cpuid_cache;
154*5e33a872SRichard Henderson 
155*5e33a872SRichard Henderson static void __attribute__((constructor)) init_cpuid_cache(void)
15688ca8e80SRichard Henderson {
157*5e33a872SRichard Henderson     int max = __get_cpuid_max(0, NULL);
158*5e33a872SRichard Henderson     int a, b, c, d;
159*5e33a872SRichard Henderson     unsigned cache = 0;
160*5e33a872SRichard Henderson 
161*5e33a872SRichard Henderson     if (max >= 1) {
162*5e33a872SRichard Henderson         __cpuid(1, a, b, c, d);
163*5e33a872SRichard Henderson         if (d & bit_SSE2) {
164*5e33a872SRichard Henderson             cache |= CACHE_SSE2;
165*5e33a872SRichard Henderson         }
166*5e33a872SRichard Henderson #ifdef CONFIG_AVX2_OPT
167*5e33a872SRichard Henderson         if (c & bit_SSE4_1) {
168*5e33a872SRichard Henderson             cache |= CACHE_SSE4;
16988ca8e80SRichard Henderson         }
17088ca8e80SRichard Henderson 
171*5e33a872SRichard Henderson         /* We must check that AVX is not just available, but usable.  */
172*5e33a872SRichard Henderson         if ((c & bit_OSXSAVE) && (c & bit_AVX)) {
173*5e33a872SRichard Henderson             __asm("xgetbv" : "=a"(a), "=d"(d) : "c"(0));
174*5e33a872SRichard Henderson             if ((a & 6) == 6) {
175*5e33a872SRichard Henderson                 cache |= CACHE_AVX1;
176*5e33a872SRichard Henderson                 if (max >= 7) {
177*5e33a872SRichard Henderson                     __cpuid_count(7, 0, a, b, c, d);
178*5e33a872SRichard Henderson                     if (b & bit_AVX2) {
179*5e33a872SRichard Henderson                         cache |= CACHE_AVX2;
18088ca8e80SRichard Henderson                     }
181*5e33a872SRichard Henderson                 }
182*5e33a872SRichard Henderson             }
183*5e33a872SRichard Henderson         }
184*5e33a872SRichard Henderson #endif
185*5e33a872SRichard Henderson     }
186*5e33a872SRichard Henderson     cpuid_cache = cache;
187*5e33a872SRichard Henderson }
188*5e33a872SRichard Henderson 
189*5e33a872SRichard Henderson static bool select_accel_fn(const void *buf, size_t len)
190*5e33a872SRichard Henderson {
191*5e33a872SRichard Henderson     uintptr_t ibuf = (uintptr_t)buf;
192*5e33a872SRichard Henderson #ifdef CONFIG_AVX2_OPT
193*5e33a872SRichard Henderson     if (len % 128 == 0 && ibuf % 32 == 0 && (cpuid_cache & CACHE_AVX2)) {
194*5e33a872SRichard Henderson         return buffer_zero_avx2(buf, len);
195*5e33a872SRichard Henderson     }
196*5e33a872SRichard Henderson #endif
197*5e33a872SRichard Henderson     if (len % 64 == 0 && ibuf % 16 == 0 && (cpuid_cache & CACHE_SSE2)) {
198*5e33a872SRichard Henderson         return buffer_zero_sse2(buf, len);
199*5e33a872SRichard Henderson     }
200*5e33a872SRichard Henderson     return buffer_zero_int(buf, len);
201*5e33a872SRichard Henderson }
202*5e33a872SRichard Henderson 
203*5e33a872SRichard Henderson #elif defined(__aarch64__)
204*5e33a872SRichard Henderson #include "arm_neon.h"
205*5e33a872SRichard Henderson 
206*5e33a872SRichard Henderson #define DO_NONZERO(X)  (vgetq_lane_u64((X), 0) | vgetq_lane_u64((X), 1))
207*5e33a872SRichard Henderson ACCEL_BUFFER_ZERO(buffer_zero_neon, 128, uint64x2_t, DO_NONZERO)
208*5e33a872SRichard Henderson 
209*5e33a872SRichard Henderson static bool select_accel_fn(const void *buf, size_t len)
210*5e33a872SRichard Henderson {
211*5e33a872SRichard Henderson     uintptr_t ibuf = (uintptr_t)buf;
212*5e33a872SRichard Henderson     if (len % 128 == 0 && ibuf % sizeof(uint64x2_t) == 0) {
213*5e33a872SRichard Henderson         return buffer_zero_neon(buf, len);
214*5e33a872SRichard Henderson     }
215*5e33a872SRichard Henderson     return buffer_zero_int(buf, len);
216*5e33a872SRichard Henderson }
217*5e33a872SRichard Henderson 
218*5e33a872SRichard Henderson #else
219*5e33a872SRichard Henderson #define select_accel_fn  buffer_zero_int
22088ca8e80SRichard Henderson #endif
22188ca8e80SRichard Henderson 
22288ca8e80SRichard Henderson /*
22388ca8e80SRichard Henderson  * Checks if a buffer is all zeroes
22488ca8e80SRichard Henderson  */
22588ca8e80SRichard Henderson bool buffer_is_zero(const void *buf, size_t len)
22688ca8e80SRichard Henderson {
227*5e33a872SRichard Henderson     if (unlikely(len == 0)) {
22888ca8e80SRichard Henderson         return true;
22988ca8e80SRichard Henderson     }
23088ca8e80SRichard Henderson 
231*5e33a872SRichard Henderson     /* Use an optimized zero check if possible.  Note that this also
232*5e33a872SRichard Henderson        includes a check for an unrolled loop over 64-bit integers.  */
233*5e33a872SRichard Henderson     return select_accel_fn(buf, len);
234*5e33a872SRichard Henderson }
235