xref: /openbmc/qemu/util/bufferiszero.c (revision efad6682452ec85a898609c885c2721ea12585db)
188ca8e80SRichard Henderson /*
288ca8e80SRichard Henderson  * Simple C functions to supplement the C library
388ca8e80SRichard Henderson  *
488ca8e80SRichard Henderson  * Copyright (c) 2006 Fabrice Bellard
588ca8e80SRichard Henderson  *
688ca8e80SRichard Henderson  * Permission is hereby granted, free of charge, to any person obtaining a copy
788ca8e80SRichard Henderson  * of this software and associated documentation files (the "Software"), to deal
888ca8e80SRichard Henderson  * in the Software without restriction, including without limitation the rights
988ca8e80SRichard Henderson  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
1088ca8e80SRichard Henderson  * copies of the Software, and to permit persons to whom the Software is
1188ca8e80SRichard Henderson  * furnished to do so, subject to the following conditions:
1288ca8e80SRichard Henderson  *
1388ca8e80SRichard Henderson  * The above copyright notice and this permission notice shall be included in
1488ca8e80SRichard Henderson  * all copies or substantial portions of the Software.
1588ca8e80SRichard Henderson  *
1688ca8e80SRichard Henderson  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1788ca8e80SRichard Henderson  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1888ca8e80SRichard Henderson  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
1988ca8e80SRichard Henderson  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
2088ca8e80SRichard Henderson  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
2188ca8e80SRichard Henderson  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
2288ca8e80SRichard Henderson  * THE SOFTWARE.
2388ca8e80SRichard Henderson  */
2488ca8e80SRichard Henderson #include "qemu/osdep.h"
2588ca8e80SRichard Henderson #include "qemu-common.h"
2688ca8e80SRichard Henderson #include "qemu/cutils.h"
275e33a872SRichard Henderson #include "qemu/bswap.h"
2888ca8e80SRichard Henderson 
2988ca8e80SRichard Henderson 
3088ca8e80SRichard Henderson /* vector definitions */
315e33a872SRichard Henderson 
325e33a872SRichard Henderson extern void link_error(void);
335e33a872SRichard Henderson 
345e33a872SRichard Henderson #define ACCEL_BUFFER_ZERO(NAME, SIZE, VECTYPE, NONZERO)         \
355e33a872SRichard Henderson static bool NAME(const void *buf, size_t len)                   \
365e33a872SRichard Henderson {                                                               \
375e33a872SRichard Henderson     const void *end = buf + len;                                \
385e33a872SRichard Henderson     do {                                                        \
395e33a872SRichard Henderson         const VECTYPE *p = buf;                                 \
405e33a872SRichard Henderson         VECTYPE t;                                              \
415e33a872SRichard Henderson         if (SIZE == sizeof(VECTYPE) * 4) {                      \
425e33a872SRichard Henderson             t = (p[0] | p[1]) | (p[2] | p[3]);                  \
435e33a872SRichard Henderson         } else if (SIZE == sizeof(VECTYPE) * 8) {               \
445e33a872SRichard Henderson             t  = p[0] | p[1];                                   \
455e33a872SRichard Henderson             t |= p[2] | p[3];                                   \
465e33a872SRichard Henderson             t |= p[4] | p[5];                                   \
475e33a872SRichard Henderson             t |= p[6] | p[7];                                   \
485e33a872SRichard Henderson         } else {                                                \
495e33a872SRichard Henderson             link_error();                                       \
505e33a872SRichard Henderson         }                                                       \
515e33a872SRichard Henderson         if (unlikely(NONZERO(t))) {                             \
525e33a872SRichard Henderson             return false;                                       \
535e33a872SRichard Henderson         }                                                       \
545e33a872SRichard Henderson         buf += SIZE;                                            \
555e33a872SRichard Henderson     } while (buf < end);                                        \
565e33a872SRichard Henderson     return true;                                                \
575e33a872SRichard Henderson }
585e33a872SRichard Henderson 
595e33a872SRichard Henderson static bool
605e33a872SRichard Henderson buffer_zero_int(const void *buf, size_t len)
615e33a872SRichard Henderson {
625e33a872SRichard Henderson     if (unlikely(len < 8)) {
635e33a872SRichard Henderson         /* For a very small buffer, simply accumulate all the bytes.  */
645e33a872SRichard Henderson         const unsigned char *p = buf;
655e33a872SRichard Henderson         const unsigned char *e = buf + len;
665e33a872SRichard Henderson         unsigned char t = 0;
675e33a872SRichard Henderson 
685e33a872SRichard Henderson         do {
695e33a872SRichard Henderson             t |= *p++;
705e33a872SRichard Henderson         } while (p < e);
715e33a872SRichard Henderson 
725e33a872SRichard Henderson         return t == 0;
735e33a872SRichard Henderson     } else {
745e33a872SRichard Henderson         /* Otherwise, use the unaligned memory access functions to
755e33a872SRichard Henderson            handle the beginning and end of the buffer, with a couple
765e33a872SRichard Henderson            of loops handling the middle aligned section.  */
775e33a872SRichard Henderson         uint64_t t = ldq_he_p(buf);
785e33a872SRichard Henderson         const uint64_t *p = (uint64_t *)(((uintptr_t)buf + 8) & -8);
795e33a872SRichard Henderson         const uint64_t *e = (uint64_t *)(((uintptr_t)buf + len) & -8);
805e33a872SRichard Henderson 
815e33a872SRichard Henderson         for (; p + 8 <= e; p += 8) {
825e33a872SRichard Henderson             __builtin_prefetch(p + 8);
835e33a872SRichard Henderson             if (t) {
845e33a872SRichard Henderson                 return false;
855e33a872SRichard Henderson             }
865e33a872SRichard Henderson             t = p[0] | p[1] | p[2] | p[3] | p[4] | p[5] | p[6] | p[7];
875e33a872SRichard Henderson         }
885e33a872SRichard Henderson         while (p < e) {
895e33a872SRichard Henderson             t |= *p++;
905e33a872SRichard Henderson         }
915e33a872SRichard Henderson         t |= ldq_he_p(buf + len - 8);
925e33a872SRichard Henderson 
935e33a872SRichard Henderson         return t == 0;
945e33a872SRichard Henderson     }
955e33a872SRichard Henderson }
965e33a872SRichard Henderson 
9743ff5e01SRichard Henderson #if defined(CONFIG_AVX2_OPT) || (defined(CONFIG_CPUID_H) && defined(__SSE2__))
985e33a872SRichard Henderson #include <cpuid.h>
995e33a872SRichard Henderson 
1005e33a872SRichard Henderson /* Do not use push_options pragmas unnecessarily, because clang
1015e33a872SRichard Henderson  * does not support them.
1025e33a872SRichard Henderson  */
1035e33a872SRichard Henderson #ifndef __SSE2__
1045e33a872SRichard Henderson #pragma GCC push_options
1055e33a872SRichard Henderson #pragma GCC target("sse2")
1065e33a872SRichard Henderson #endif
10788ca8e80SRichard Henderson #include <emmintrin.h>
1085e33a872SRichard Henderson #define SSE2_NONZERO(X) \
1095e33a872SRichard Henderson     (_mm_movemask_epi8(_mm_cmpeq_epi8((X), _mm_setzero_si128())) != 0xFFFF)
1105e33a872SRichard Henderson ACCEL_BUFFER_ZERO(buffer_zero_sse2, 64, __m128i, SSE2_NONZERO)
1115e33a872SRichard Henderson #ifndef __SSE2__
1125e33a872SRichard Henderson #pragma GCC pop_options
11388ca8e80SRichard Henderson #endif
11488ca8e80SRichard Henderson 
1155e33a872SRichard Henderson #ifdef CONFIG_AVX2_OPT
11688ca8e80SRichard Henderson #pragma GCC push_options
11788ca8e80SRichard Henderson #pragma GCC target("avx2")
11888ca8e80SRichard Henderson #include <immintrin.h>
1195e33a872SRichard Henderson #define AVX2_NONZERO(X)  !_mm256_testz_si256((X), (X))
1205e33a872SRichard Henderson ACCEL_BUFFER_ZERO(buffer_zero_avx2, 128, __m256i, AVX2_NONZERO)
12188ca8e80SRichard Henderson #pragma GCC pop_options
1225e33a872SRichard Henderson #endif
1235e33a872SRichard Henderson 
1245e33a872SRichard Henderson #define CACHE_AVX2    2
1255e33a872SRichard Henderson #define CACHE_AVX1    4
1265e33a872SRichard Henderson #define CACHE_SSE4    8
1275e33a872SRichard Henderson #define CACHE_SSE2    16
1285e33a872SRichard Henderson 
1295e33a872SRichard Henderson static unsigned cpuid_cache;
1305e33a872SRichard Henderson 
1315e33a872SRichard Henderson static void __attribute__((constructor)) init_cpuid_cache(void)
13288ca8e80SRichard Henderson {
1335e33a872SRichard Henderson     int max = __get_cpuid_max(0, NULL);
1345e33a872SRichard Henderson     int a, b, c, d;
1355e33a872SRichard Henderson     unsigned cache = 0;
1365e33a872SRichard Henderson 
1375e33a872SRichard Henderson     if (max >= 1) {
1385e33a872SRichard Henderson         __cpuid(1, a, b, c, d);
1395e33a872SRichard Henderson         if (d & bit_SSE2) {
1405e33a872SRichard Henderson             cache |= CACHE_SSE2;
1415e33a872SRichard Henderson         }
1425e33a872SRichard Henderson #ifdef CONFIG_AVX2_OPT
1435e33a872SRichard Henderson         if (c & bit_SSE4_1) {
1445e33a872SRichard Henderson             cache |= CACHE_SSE4;
14588ca8e80SRichard Henderson         }
14688ca8e80SRichard Henderson 
1475e33a872SRichard Henderson         /* We must check that AVX is not just available, but usable.  */
1485e33a872SRichard Henderson         if ((c & bit_OSXSAVE) && (c & bit_AVX)) {
1495e33a872SRichard Henderson             __asm("xgetbv" : "=a"(a), "=d"(d) : "c"(0));
1505e33a872SRichard Henderson             if ((a & 6) == 6) {
1515e33a872SRichard Henderson                 cache |= CACHE_AVX1;
1525e33a872SRichard Henderson                 if (max >= 7) {
1535e33a872SRichard Henderson                     __cpuid_count(7, 0, a, b, c, d);
1545e33a872SRichard Henderson                     if (b & bit_AVX2) {
1555e33a872SRichard Henderson                         cache |= CACHE_AVX2;
15688ca8e80SRichard Henderson                     }
1575e33a872SRichard Henderson                 }
1585e33a872SRichard Henderson             }
1595e33a872SRichard Henderson         }
1605e33a872SRichard Henderson #endif
1615e33a872SRichard Henderson     }
1625e33a872SRichard Henderson     cpuid_cache = cache;
1635e33a872SRichard Henderson }
1645e33a872SRichard Henderson 
165*efad6682SRichard Henderson #define HAVE_NEXT_ACCEL
166*efad6682SRichard Henderson bool test_buffer_is_zero_next_accel(void)
167*efad6682SRichard Henderson {
168*efad6682SRichard Henderson     /* If no bits set, we just tested buffer_zero_int, and there
169*efad6682SRichard Henderson        are no more acceleration options to test.  */
170*efad6682SRichard Henderson     if (cpuid_cache == 0) {
171*efad6682SRichard Henderson         return false;
172*efad6682SRichard Henderson     }
173*efad6682SRichard Henderson     /* Disable the accelerator we used before and select a new one.  */
174*efad6682SRichard Henderson     cpuid_cache &= cpuid_cache - 1;
175*efad6682SRichard Henderson     return true;
176*efad6682SRichard Henderson }
177*efad6682SRichard Henderson 
1785e33a872SRichard Henderson static bool select_accel_fn(const void *buf, size_t len)
1795e33a872SRichard Henderson {
1805e33a872SRichard Henderson     uintptr_t ibuf = (uintptr_t)buf;
1815e33a872SRichard Henderson #ifdef CONFIG_AVX2_OPT
1825e33a872SRichard Henderson     if (len % 128 == 0 && ibuf % 32 == 0 && (cpuid_cache & CACHE_AVX2)) {
1835e33a872SRichard Henderson         return buffer_zero_avx2(buf, len);
1845e33a872SRichard Henderson     }
1855e33a872SRichard Henderson #endif
1865e33a872SRichard Henderson     if (len % 64 == 0 && ibuf % 16 == 0 && (cpuid_cache & CACHE_SSE2)) {
1875e33a872SRichard Henderson         return buffer_zero_sse2(buf, len);
1885e33a872SRichard Henderson     }
1895e33a872SRichard Henderson     return buffer_zero_int(buf, len);
1905e33a872SRichard Henderson }
1915e33a872SRichard Henderson 
1925e33a872SRichard Henderson #else
1935e33a872SRichard Henderson #define select_accel_fn  buffer_zero_int
19488ca8e80SRichard Henderson #endif
19588ca8e80SRichard Henderson 
196*efad6682SRichard Henderson #ifndef HAVE_NEXT_ACCEL
197*efad6682SRichard Henderson bool test_buffer_is_zero_next_accel(void)
198*efad6682SRichard Henderson {
199*efad6682SRichard Henderson     return false;
200*efad6682SRichard Henderson }
201*efad6682SRichard Henderson #endif
202*efad6682SRichard Henderson 
20388ca8e80SRichard Henderson /*
20488ca8e80SRichard Henderson  * Checks if a buffer is all zeroes
20588ca8e80SRichard Henderson  */
20688ca8e80SRichard Henderson bool buffer_is_zero(const void *buf, size_t len)
20788ca8e80SRichard Henderson {
2085e33a872SRichard Henderson     if (unlikely(len == 0)) {
20988ca8e80SRichard Henderson         return true;
21088ca8e80SRichard Henderson     }
21188ca8e80SRichard Henderson 
2125e33a872SRichard Henderson     /* Use an optimized zero check if possible.  Note that this also
2135e33a872SRichard Henderson        includes a check for an unrolled loop over 64-bit integers.  */
2145e33a872SRichard Henderson     return select_accel_fn(buf, len);
2155e33a872SRichard Henderson }
216