160fe637bSDr. David Alan Gilbert /*
260fe637bSDr. David Alan Gilbert * Xor Based Zero Run Length Encoding
360fe637bSDr. David Alan Gilbert *
460fe637bSDr. David Alan Gilbert * Copyright 2013 Red Hat, Inc. and/or its affiliates
560fe637bSDr. David Alan Gilbert *
660fe637bSDr. David Alan Gilbert * Authors:
760fe637bSDr. David Alan Gilbert * Orit Wasserman <owasserm@redhat.com>
860fe637bSDr. David Alan Gilbert *
960fe637bSDr. David Alan Gilbert * This work is licensed under the terms of the GNU GPL, version 2 or later.
1060fe637bSDr. David Alan Gilbert * See the COPYING file in the top-level directory.
1160fe637bSDr. David Alan Gilbert *
1260fe637bSDr. David Alan Gilbert */
131393a485SPeter Maydell #include "qemu/osdep.h"
14f348b6d1SVeronia Bahaa #include "qemu/cutils.h"
15d84a78d1SMatheus Tavares Bernardino #include "qemu/host-utils.h"
16709e3fe8SJuan Quintela #include "xbzrle.h"
1760fe637bSDr. David Alan Gilbert
181b48d0abSRichard Henderson #if defined(CONFIG_AVX512BW_OPT)
191b48d0abSRichard Henderson #include <immintrin.h>
20*7ba7db9fSRichard Henderson #include "host/cpuinfo.h"
211b48d0abSRichard Henderson
22*7ba7db9fSRichard Henderson static int __attribute__((target("avx512bw")))
xbzrle_encode_buffer_avx512(uint8_t * old_buf,uint8_t * new_buf,int slen,uint8_t * dst,int dlen)231b48d0abSRichard Henderson xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
241b48d0abSRichard Henderson uint8_t *dst, int dlen)
251b48d0abSRichard Henderson {
261b48d0abSRichard Henderson uint32_t zrun_len = 0, nzrun_len = 0;
271b48d0abSRichard Henderson int d = 0, i = 0, num = 0;
281b48d0abSRichard Henderson uint8_t *nzrun_start = NULL;
291b48d0abSRichard Henderson /* add 1 to include residual part in main loop */
301b48d0abSRichard Henderson uint32_t count512s = (slen >> 6) + 1;
311b48d0abSRichard Henderson /* countResidual is tail of data, i.e., countResidual = slen % 64 */
321b48d0abSRichard Henderson uint32_t count_residual = slen & 0b111111;
331b48d0abSRichard Henderson bool never_same = true;
341b48d0abSRichard Henderson uint64_t mask_residual = 1;
351b48d0abSRichard Henderson mask_residual <<= count_residual;
361b48d0abSRichard Henderson mask_residual -= 1;
371b48d0abSRichard Henderson __m512i r = _mm512_set1_epi32(0);
381b48d0abSRichard Henderson
391b48d0abSRichard Henderson while (count512s) {
401b48d0abSRichard Henderson int bytes_to_check = 64;
411b48d0abSRichard Henderson uint64_t mask = 0xffffffffffffffff;
421b48d0abSRichard Henderson if (count512s == 1) {
431b48d0abSRichard Henderson bytes_to_check = count_residual;
441b48d0abSRichard Henderson mask = mask_residual;
451b48d0abSRichard Henderson }
461b48d0abSRichard Henderson __m512i old_data = _mm512_mask_loadu_epi8(r,
471b48d0abSRichard Henderson mask, old_buf + i);
481b48d0abSRichard Henderson __m512i new_data = _mm512_mask_loadu_epi8(r,
491b48d0abSRichard Henderson mask, new_buf + i);
501b48d0abSRichard Henderson uint64_t comp = _mm512_cmpeq_epi8_mask(old_data, new_data);
511b48d0abSRichard Henderson count512s--;
521b48d0abSRichard Henderson
531b48d0abSRichard Henderson bool is_same = (comp & 0x1);
541b48d0abSRichard Henderson while (bytes_to_check) {
551b48d0abSRichard Henderson if (d + 2 > dlen) {
561b48d0abSRichard Henderson return -1;
571b48d0abSRichard Henderson }
581b48d0abSRichard Henderson if (is_same) {
591b48d0abSRichard Henderson if (nzrun_len) {
601b48d0abSRichard Henderson d += uleb128_encode_small(dst + d, nzrun_len);
611b48d0abSRichard Henderson if (d + nzrun_len > dlen) {
621b48d0abSRichard Henderson return -1;
631b48d0abSRichard Henderson }
641b48d0abSRichard Henderson nzrun_start = new_buf + i - nzrun_len;
651b48d0abSRichard Henderson memcpy(dst + d, nzrun_start, nzrun_len);
661b48d0abSRichard Henderson d += nzrun_len;
671b48d0abSRichard Henderson nzrun_len = 0;
681b48d0abSRichard Henderson }
691b48d0abSRichard Henderson /* 64 data at a time for speed */
701b48d0abSRichard Henderson if (count512s && (comp == 0xffffffffffffffff)) {
711b48d0abSRichard Henderson i += 64;
721b48d0abSRichard Henderson zrun_len += 64;
731b48d0abSRichard Henderson break;
741b48d0abSRichard Henderson }
751b48d0abSRichard Henderson never_same = false;
761b48d0abSRichard Henderson num = ctz64(~comp);
771b48d0abSRichard Henderson num = (num < bytes_to_check) ? num : bytes_to_check;
781b48d0abSRichard Henderson zrun_len += num;
791b48d0abSRichard Henderson bytes_to_check -= num;
801b48d0abSRichard Henderson comp >>= num;
811b48d0abSRichard Henderson i += num;
821b48d0abSRichard Henderson if (bytes_to_check) {
831b48d0abSRichard Henderson /* still has different data after same data */
841b48d0abSRichard Henderson d += uleb128_encode_small(dst + d, zrun_len);
851b48d0abSRichard Henderson zrun_len = 0;
861b48d0abSRichard Henderson } else {
871b48d0abSRichard Henderson break;
881b48d0abSRichard Henderson }
891b48d0abSRichard Henderson }
901b48d0abSRichard Henderson if (never_same || zrun_len) {
911b48d0abSRichard Henderson /*
921b48d0abSRichard Henderson * never_same only acts if
931b48d0abSRichard Henderson * data begins with diff in first count512s
941b48d0abSRichard Henderson */
951b48d0abSRichard Henderson d += uleb128_encode_small(dst + d, zrun_len);
961b48d0abSRichard Henderson zrun_len = 0;
971b48d0abSRichard Henderson never_same = false;
981b48d0abSRichard Henderson }
991b48d0abSRichard Henderson /* has diff, 64 data at a time for speed */
1001b48d0abSRichard Henderson if ((bytes_to_check == 64) && (comp == 0x0)) {
1011b48d0abSRichard Henderson i += 64;
1021b48d0abSRichard Henderson nzrun_len += 64;
1031b48d0abSRichard Henderson break;
1041b48d0abSRichard Henderson }
1051b48d0abSRichard Henderson num = ctz64(comp);
1061b48d0abSRichard Henderson num = (num < bytes_to_check) ? num : bytes_to_check;
1071b48d0abSRichard Henderson nzrun_len += num;
1081b48d0abSRichard Henderson bytes_to_check -= num;
1091b48d0abSRichard Henderson comp >>= num;
1101b48d0abSRichard Henderson i += num;
1111b48d0abSRichard Henderson if (bytes_to_check) {
1121b48d0abSRichard Henderson /* mask like 111000 */
1131b48d0abSRichard Henderson d += uleb128_encode_small(dst + d, nzrun_len);
1141b48d0abSRichard Henderson /* overflow */
1151b48d0abSRichard Henderson if (d + nzrun_len > dlen) {
1161b48d0abSRichard Henderson return -1;
1171b48d0abSRichard Henderson }
1181b48d0abSRichard Henderson nzrun_start = new_buf + i - nzrun_len;
1191b48d0abSRichard Henderson memcpy(dst + d, nzrun_start, nzrun_len);
1201b48d0abSRichard Henderson d += nzrun_len;
1211b48d0abSRichard Henderson nzrun_len = 0;
1221b48d0abSRichard Henderson is_same = true;
1231b48d0abSRichard Henderson }
1241b48d0abSRichard Henderson }
1251b48d0abSRichard Henderson }
1261b48d0abSRichard Henderson
1271b48d0abSRichard Henderson if (nzrun_len != 0) {
1281b48d0abSRichard Henderson d += uleb128_encode_small(dst + d, nzrun_len);
1291b48d0abSRichard Henderson /* overflow */
1301b48d0abSRichard Henderson if (d + nzrun_len > dlen) {
1311b48d0abSRichard Henderson return -1;
1321b48d0abSRichard Henderson }
1331b48d0abSRichard Henderson nzrun_start = new_buf + i - nzrun_len;
1341b48d0abSRichard Henderson memcpy(dst + d, nzrun_start, nzrun_len);
1351b48d0abSRichard Henderson d += nzrun_len;
1361b48d0abSRichard Henderson }
1371b48d0abSRichard Henderson return d;
1381b48d0abSRichard Henderson }
139*7ba7db9fSRichard Henderson
140*7ba7db9fSRichard Henderson static int xbzrle_encode_buffer_int(uint8_t *old_buf, uint8_t *new_buf,
141*7ba7db9fSRichard Henderson int slen, uint8_t *dst, int dlen);
142*7ba7db9fSRichard Henderson
143*7ba7db9fSRichard Henderson static int (*accel_func)(uint8_t *, uint8_t *, int, uint8_t *, int);
144*7ba7db9fSRichard Henderson
init_accel(void)145*7ba7db9fSRichard Henderson static void __attribute__((constructor)) init_accel(void)
146*7ba7db9fSRichard Henderson {
147*7ba7db9fSRichard Henderson unsigned info = cpuinfo_init();
148*7ba7db9fSRichard Henderson if (info & CPUINFO_AVX512BW) {
149*7ba7db9fSRichard Henderson accel_func = xbzrle_encode_buffer_avx512;
150*7ba7db9fSRichard Henderson } else {
151*7ba7db9fSRichard Henderson accel_func = xbzrle_encode_buffer_int;
152*7ba7db9fSRichard Henderson }
153*7ba7db9fSRichard Henderson }
154*7ba7db9fSRichard Henderson
xbzrle_encode_buffer(uint8_t * old_buf,uint8_t * new_buf,int slen,uint8_t * dst,int dlen)155*7ba7db9fSRichard Henderson int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen,
156*7ba7db9fSRichard Henderson uint8_t *dst, int dlen)
157*7ba7db9fSRichard Henderson {
158*7ba7db9fSRichard Henderson return accel_func(old_buf, new_buf, slen, dst, dlen);
159*7ba7db9fSRichard Henderson }
160*7ba7db9fSRichard Henderson
161*7ba7db9fSRichard Henderson #define xbzrle_encode_buffer xbzrle_encode_buffer_int
1621b48d0abSRichard Henderson #endif
1631b48d0abSRichard Henderson
16460fe637bSDr. David Alan Gilbert /*
16560fe637bSDr. David Alan Gilbert page = zrun nzrun
16660fe637bSDr. David Alan Gilbert | zrun nzrun page
16760fe637bSDr. David Alan Gilbert
16860fe637bSDr. David Alan Gilbert zrun = length
16960fe637bSDr. David Alan Gilbert
17060fe637bSDr. David Alan Gilbert nzrun = length byte...
17160fe637bSDr. David Alan Gilbert
17260fe637bSDr. David Alan Gilbert length = uleb128 encoded integer
17360fe637bSDr. David Alan Gilbert */
xbzrle_encode_buffer(uint8_t * old_buf,uint8_t * new_buf,int slen,uint8_t * dst,int dlen)17460fe637bSDr. David Alan Gilbert int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen,
17560fe637bSDr. David Alan Gilbert uint8_t *dst, int dlen)
17660fe637bSDr. David Alan Gilbert {
17760fe637bSDr. David Alan Gilbert uint32_t zrun_len = 0, nzrun_len = 0;
17860fe637bSDr. David Alan Gilbert int d = 0, i = 0;
17960fe637bSDr. David Alan Gilbert long res;
18060fe637bSDr. David Alan Gilbert uint8_t *nzrun_start = NULL;
18160fe637bSDr. David Alan Gilbert
18260fe637bSDr. David Alan Gilbert g_assert(!(((uintptr_t)old_buf | (uintptr_t)new_buf | slen) %
18360fe637bSDr. David Alan Gilbert sizeof(long)));
18460fe637bSDr. David Alan Gilbert
18560fe637bSDr. David Alan Gilbert while (i < slen) {
18660fe637bSDr. David Alan Gilbert /* overflow */
18760fe637bSDr. David Alan Gilbert if (d + 2 > dlen) {
18860fe637bSDr. David Alan Gilbert return -1;
18960fe637bSDr. David Alan Gilbert }
19060fe637bSDr. David Alan Gilbert
19160fe637bSDr. David Alan Gilbert /* not aligned to sizeof(long) */
19260fe637bSDr. David Alan Gilbert res = (slen - i) % sizeof(long);
19360fe637bSDr. David Alan Gilbert while (res && old_buf[i] == new_buf[i]) {
19460fe637bSDr. David Alan Gilbert zrun_len++;
19560fe637bSDr. David Alan Gilbert i++;
19660fe637bSDr. David Alan Gilbert res--;
19760fe637bSDr. David Alan Gilbert }
19860fe637bSDr. David Alan Gilbert
19960fe637bSDr. David Alan Gilbert /* word at a time for speed */
20060fe637bSDr. David Alan Gilbert if (!res) {
20160fe637bSDr. David Alan Gilbert while (i < slen &&
20260fe637bSDr. David Alan Gilbert (*(long *)(old_buf + i)) == (*(long *)(new_buf + i))) {
20360fe637bSDr. David Alan Gilbert i += sizeof(long);
20460fe637bSDr. David Alan Gilbert zrun_len += sizeof(long);
20560fe637bSDr. David Alan Gilbert }
20660fe637bSDr. David Alan Gilbert
20760fe637bSDr. David Alan Gilbert /* go over the rest */
20860fe637bSDr. David Alan Gilbert while (i < slen && old_buf[i] == new_buf[i]) {
20960fe637bSDr. David Alan Gilbert zrun_len++;
21060fe637bSDr. David Alan Gilbert i++;
21160fe637bSDr. David Alan Gilbert }
21260fe637bSDr. David Alan Gilbert }
21360fe637bSDr. David Alan Gilbert
21460fe637bSDr. David Alan Gilbert /* buffer unchanged */
21560fe637bSDr. David Alan Gilbert if (zrun_len == slen) {
21660fe637bSDr. David Alan Gilbert return 0;
21760fe637bSDr. David Alan Gilbert }
21860fe637bSDr. David Alan Gilbert
21960fe637bSDr. David Alan Gilbert /* skip last zero run */
22060fe637bSDr. David Alan Gilbert if (i == slen) {
22160fe637bSDr. David Alan Gilbert return d;
22260fe637bSDr. David Alan Gilbert }
22360fe637bSDr. David Alan Gilbert
22460fe637bSDr. David Alan Gilbert d += uleb128_encode_small(dst + d, zrun_len);
22560fe637bSDr. David Alan Gilbert
22660fe637bSDr. David Alan Gilbert zrun_len = 0;
22760fe637bSDr. David Alan Gilbert nzrun_start = new_buf + i;
22860fe637bSDr. David Alan Gilbert
22960fe637bSDr. David Alan Gilbert /* overflow */
23060fe637bSDr. David Alan Gilbert if (d + 2 > dlen) {
23160fe637bSDr. David Alan Gilbert return -1;
23260fe637bSDr. David Alan Gilbert }
23360fe637bSDr. David Alan Gilbert /* not aligned to sizeof(long) */
23460fe637bSDr. David Alan Gilbert res = (slen - i) % sizeof(long);
23560fe637bSDr. David Alan Gilbert while (res && old_buf[i] != new_buf[i]) {
23660fe637bSDr. David Alan Gilbert i++;
23760fe637bSDr. David Alan Gilbert nzrun_len++;
23860fe637bSDr. David Alan Gilbert res--;
23960fe637bSDr. David Alan Gilbert }
24060fe637bSDr. David Alan Gilbert
24160fe637bSDr. David Alan Gilbert /* word at a time for speed, use of 32-bit long okay */
24260fe637bSDr. David Alan Gilbert if (!res) {
24360fe637bSDr. David Alan Gilbert /* truncation to 32-bit long okay */
24460fe637bSDr. David Alan Gilbert unsigned long mask = (unsigned long)0x0101010101010101ULL;
24560fe637bSDr. David Alan Gilbert while (i < slen) {
24660fe637bSDr. David Alan Gilbert unsigned long xor;
24760fe637bSDr. David Alan Gilbert xor = *(unsigned long *)(old_buf + i)
24860fe637bSDr. David Alan Gilbert ^ *(unsigned long *)(new_buf + i);
24960fe637bSDr. David Alan Gilbert if ((xor - mask) & ~xor & (mask << 7)) {
25060fe637bSDr. David Alan Gilbert /* found the end of an nzrun within the current long */
25160fe637bSDr. David Alan Gilbert while (old_buf[i] != new_buf[i]) {
25260fe637bSDr. David Alan Gilbert nzrun_len++;
25360fe637bSDr. David Alan Gilbert i++;
25460fe637bSDr. David Alan Gilbert }
25560fe637bSDr. David Alan Gilbert break;
25660fe637bSDr. David Alan Gilbert } else {
25760fe637bSDr. David Alan Gilbert i += sizeof(long);
25860fe637bSDr. David Alan Gilbert nzrun_len += sizeof(long);
25960fe637bSDr. David Alan Gilbert }
26060fe637bSDr. David Alan Gilbert }
26160fe637bSDr. David Alan Gilbert }
26260fe637bSDr. David Alan Gilbert
26360fe637bSDr. David Alan Gilbert d += uleb128_encode_small(dst + d, nzrun_len);
26460fe637bSDr. David Alan Gilbert /* overflow */
26560fe637bSDr. David Alan Gilbert if (d + nzrun_len > dlen) {
26660fe637bSDr. David Alan Gilbert return -1;
26760fe637bSDr. David Alan Gilbert }
26860fe637bSDr. David Alan Gilbert memcpy(dst + d, nzrun_start, nzrun_len);
26960fe637bSDr. David Alan Gilbert d += nzrun_len;
27060fe637bSDr. David Alan Gilbert nzrun_len = 0;
27160fe637bSDr. David Alan Gilbert }
27260fe637bSDr. David Alan Gilbert
27360fe637bSDr. David Alan Gilbert return d;
27460fe637bSDr. David Alan Gilbert }
27560fe637bSDr. David Alan Gilbert
xbzrle_decode_buffer(uint8_t * src,int slen,uint8_t * dst,int dlen)27660fe637bSDr. David Alan Gilbert int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen)
27760fe637bSDr. David Alan Gilbert {
27860fe637bSDr. David Alan Gilbert int i = 0, d = 0;
27960fe637bSDr. David Alan Gilbert int ret;
28060fe637bSDr. David Alan Gilbert uint32_t count = 0;
28160fe637bSDr. David Alan Gilbert
28260fe637bSDr. David Alan Gilbert while (i < slen) {
28360fe637bSDr. David Alan Gilbert
28460fe637bSDr. David Alan Gilbert /* zrun */
28560fe637bSDr. David Alan Gilbert if ((slen - i) < 2) {
28660fe637bSDr. David Alan Gilbert return -1;
28760fe637bSDr. David Alan Gilbert }
28860fe637bSDr. David Alan Gilbert
28960fe637bSDr. David Alan Gilbert ret = uleb128_decode_small(src + i, &count);
29060fe637bSDr. David Alan Gilbert if (ret < 0 || (i && !count)) {
29160fe637bSDr. David Alan Gilbert return -1;
29260fe637bSDr. David Alan Gilbert }
29360fe637bSDr. David Alan Gilbert i += ret;
29460fe637bSDr. David Alan Gilbert d += count;
29560fe637bSDr. David Alan Gilbert
29660fe637bSDr. David Alan Gilbert /* overflow */
29760fe637bSDr. David Alan Gilbert if (d > dlen) {
29860fe637bSDr. David Alan Gilbert return -1;
29960fe637bSDr. David Alan Gilbert }
30060fe637bSDr. David Alan Gilbert
30160fe637bSDr. David Alan Gilbert /* nzrun */
30260fe637bSDr. David Alan Gilbert if ((slen - i) < 2) {
30360fe637bSDr. David Alan Gilbert return -1;
30460fe637bSDr. David Alan Gilbert }
30560fe637bSDr. David Alan Gilbert
30660fe637bSDr. David Alan Gilbert ret = uleb128_decode_small(src + i, &count);
30760fe637bSDr. David Alan Gilbert if (ret < 0 || !count) {
30860fe637bSDr. David Alan Gilbert return -1;
30960fe637bSDr. David Alan Gilbert }
31060fe637bSDr. David Alan Gilbert i += ret;
31160fe637bSDr. David Alan Gilbert
31260fe637bSDr. David Alan Gilbert /* overflow */
31360fe637bSDr. David Alan Gilbert if (d + count > dlen || i + count > slen) {
31460fe637bSDr. David Alan Gilbert return -1;
31560fe637bSDr. David Alan Gilbert }
31660fe637bSDr. David Alan Gilbert
31760fe637bSDr. David Alan Gilbert memcpy(dst + d, src + i, count);
31860fe637bSDr. David Alan Gilbert d += count;
31960fe637bSDr. David Alan Gilbert i += count;
32060fe637bSDr. David Alan Gilbert }
32160fe637bSDr. David Alan Gilbert
32260fe637bSDr. David Alan Gilbert return d;
32360fe637bSDr. David Alan Gilbert }
324