188ca8e80SRichard Henderson /*
288ca8e80SRichard Henderson * Simple C functions to supplement the C library
388ca8e80SRichard Henderson *
488ca8e80SRichard Henderson * Copyright (c) 2006 Fabrice Bellard
588ca8e80SRichard Henderson *
688ca8e80SRichard Henderson * Permission is hereby granted, free of charge, to any person obtaining a copy
788ca8e80SRichard Henderson * of this software and associated documentation files (the "Software"), to deal
888ca8e80SRichard Henderson * in the Software without restriction, including without limitation the rights
988ca8e80SRichard Henderson * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
1088ca8e80SRichard Henderson * copies of the Software, and to permit persons to whom the Software is
1188ca8e80SRichard Henderson * furnished to do so, subject to the following conditions:
1288ca8e80SRichard Henderson *
1388ca8e80SRichard Henderson * The above copyright notice and this permission notice shall be included in
1488ca8e80SRichard Henderson * all copies or substantial portions of the Software.
1588ca8e80SRichard Henderson *
1688ca8e80SRichard Henderson * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1788ca8e80SRichard Henderson * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1888ca8e80SRichard Henderson * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
1988ca8e80SRichard Henderson * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
2088ca8e80SRichard Henderson * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
2188ca8e80SRichard Henderson * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
2288ca8e80SRichard Henderson * THE SOFTWARE.
2388ca8e80SRichard Henderson */
2488ca8e80SRichard Henderson #include "qemu/osdep.h"
2588ca8e80SRichard Henderson #include "qemu/cutils.h"
265e33a872SRichard Henderson #include "qemu/bswap.h"
2751f4d916SRichard Henderson #include "host/cpuinfo.h"
2888ca8e80SRichard Henderson
290100ce2bSRichard Henderson typedef bool (*biz_accel_fn)(const void *, size_t);
30cbe3d526SAlexander Monakov
buffer_is_zero_int_lt256(const void * buf,size_t len)317ae6399aSRichard Henderson static bool buffer_is_zero_int_lt256(const void *buf, size_t len)
325e33a872SRichard Henderson {
337ae6399aSRichard Henderson uint64_t t;
347ae6399aSRichard Henderson const uint64_t *p, *e;
355e33a872SRichard Henderson
367ae6399aSRichard Henderson /*
377ae6399aSRichard Henderson * Use unaligned memory access functions to handle
387ae6399aSRichard Henderson * the beginning and end of the buffer.
397ae6399aSRichard Henderson */
407ae6399aSRichard Henderson if (unlikely(len <= 8)) {
417ae6399aSRichard Henderson return (ldl_he_p(buf) | ldl_he_p(buf + len - 4)) == 0;
427ae6399aSRichard Henderson }
437ae6399aSRichard Henderson
447ae6399aSRichard Henderson t = ldq_he_p(buf) | ldq_he_p(buf + len - 8);
457ae6399aSRichard Henderson p = QEMU_ALIGN_PTR_DOWN(buf + 8, 8);
467ae6399aSRichard Henderson e = QEMU_ALIGN_PTR_DOWN(buf + len - 1, 8);
477ae6399aSRichard Henderson
487ae6399aSRichard Henderson /* Read 0 to 31 aligned words from the middle. */
497ae6399aSRichard Henderson while (p < e) {
505e33a872SRichard Henderson t |= *p++;
517ae6399aSRichard Henderson }
525e33a872SRichard Henderson return t == 0;
537ae6399aSRichard Henderson }
545e33a872SRichard Henderson
buffer_is_zero_int_ge256(const void * buf,size_t len)557ae6399aSRichard Henderson static bool buffer_is_zero_int_ge256(const void *buf, size_t len)
567ae6399aSRichard Henderson {
577ae6399aSRichard Henderson /*
587ae6399aSRichard Henderson * Use unaligned memory access functions to handle
597ae6399aSRichard Henderson * the beginning and end of the buffer.
607ae6399aSRichard Henderson */
617ae6399aSRichard Henderson uint64_t t = ldq_he_p(buf) | ldq_he_p(buf + len - 8);
627ae6399aSRichard Henderson const uint64_t *p = QEMU_ALIGN_PTR_DOWN(buf + 8, 8);
637ae6399aSRichard Henderson const uint64_t *e = QEMU_ALIGN_PTR_DOWN(buf + len - 1, 8);
647ae6399aSRichard Henderson
657ae6399aSRichard Henderson /* Collect a partial block at the tail end. */
667ae6399aSRichard Henderson t |= e[-7] | e[-6] | e[-5] | e[-4] | e[-3] | e[-2] | e[-1];
677ae6399aSRichard Henderson
687ae6399aSRichard Henderson /*
697ae6399aSRichard Henderson * Loop over 64 byte blocks.
707ae6399aSRichard Henderson * With the head and tail removed, e - p >= 30,
717ae6399aSRichard Henderson * so the loop must iterate at least 3 times.
727ae6399aSRichard Henderson */
737ae6399aSRichard Henderson do {
745e33a872SRichard Henderson if (t) {
755e33a872SRichard Henderson return false;
765e33a872SRichard Henderson }
775e33a872SRichard Henderson t = p[0] | p[1] | p[2] | p[3] | p[4] | p[5] | p[6] | p[7];
787ae6399aSRichard Henderson p += 8;
797ae6399aSRichard Henderson } while (p < e - 7);
805e33a872SRichard Henderson
815e33a872SRichard Henderson return t == 0;
825e33a872SRichard Henderson }
835e33a872SRichard Henderson
84*2d32a5d2SRichard Henderson #include "host/bufferiszero.c.inc"
85efad6682SRichard Henderson
86bf67aa3dSRichard Henderson static biz_accel_fn buffer_is_zero_accel;
87bf67aa3dSRichard Henderson static unsigned accel_index;
88cbe3d526SAlexander Monakov
buffer_is_zero_ool(const void * buf,size_t len)89cbe3d526SAlexander Monakov bool buffer_is_zero_ool(const void *buf, size_t len)
9088ca8e80SRichard Henderson {
915e33a872SRichard Henderson if (unlikely(len == 0)) {
9288ca8e80SRichard Henderson return true;
9388ca8e80SRichard Henderson }
94cbe3d526SAlexander Monakov if (!buffer_is_zero_sample3(buf, len)) {
95cbe3d526SAlexander Monakov return false;
96cbe3d526SAlexander Monakov }
97cbe3d526SAlexander Monakov /* All bytes are covered for any len <= 3. */
98cbe3d526SAlexander Monakov if (unlikely(len <= 3)) {
99cbe3d526SAlexander Monakov return true;
100cbe3d526SAlexander Monakov }
10188ca8e80SRichard Henderson
102cbe3d526SAlexander Monakov if (likely(len >= 256)) {
103cbe3d526SAlexander Monakov return buffer_is_zero_accel(buf, len);
104cbe3d526SAlexander Monakov }
1057ae6399aSRichard Henderson return buffer_is_zero_int_lt256(buf, len);
106cbe3d526SAlexander Monakov }
107083d012aSRichard Henderson
buffer_is_zero_ge256(const void * buf,size_t len)108cbe3d526SAlexander Monakov bool buffer_is_zero_ge256(const void *buf, size_t len)
109cbe3d526SAlexander Monakov {
110cbe3d526SAlexander Monakov return buffer_is_zero_accel(buf, len);
1115e33a872SRichard Henderson }
112bf67aa3dSRichard Henderson
test_buffer_is_zero_next_accel(void)113bf67aa3dSRichard Henderson bool test_buffer_is_zero_next_accel(void)
114bf67aa3dSRichard Henderson {
115bf67aa3dSRichard Henderson if (accel_index != 0) {
116bf67aa3dSRichard Henderson buffer_is_zero_accel = accel_table[--accel_index];
117bf67aa3dSRichard Henderson return true;
118bf67aa3dSRichard Henderson }
119bf67aa3dSRichard Henderson return false;
120bf67aa3dSRichard Henderson }
121bf67aa3dSRichard Henderson
init_accel(void)122bf67aa3dSRichard Henderson static void __attribute__((constructor)) init_accel(void)
123bf67aa3dSRichard Henderson {
124bf67aa3dSRichard Henderson accel_index = best_accel();
125bf67aa3dSRichard Henderson buffer_is_zero_accel = accel_table[accel_index];
126bf67aa3dSRichard Henderson }
127