xref: /openbmc/qemu/migration/xbzrle.c (revision 1c12355b31046a6b35a4f50c85c4f01afb1bd728)
160fe637bSDr. David Alan Gilbert /*
260fe637bSDr. David Alan Gilbert  * Xor Based Zero Run Length Encoding
360fe637bSDr. David Alan Gilbert  *
460fe637bSDr. David Alan Gilbert  * Copyright 2013 Red Hat, Inc. and/or its affiliates
560fe637bSDr. David Alan Gilbert  *
660fe637bSDr. David Alan Gilbert  * Authors:
760fe637bSDr. David Alan Gilbert  *  Orit Wasserman  <owasserm@redhat.com>
860fe637bSDr. David Alan Gilbert  *
960fe637bSDr. David Alan Gilbert  * This work is licensed under the terms of the GNU GPL, version 2 or later.
1060fe637bSDr. David Alan Gilbert  * See the COPYING file in the top-level directory.
1160fe637bSDr. David Alan Gilbert  *
1260fe637bSDr. David Alan Gilbert  */
131393a485SPeter Maydell #include "qemu/osdep.h"
14f348b6d1SVeronia Bahaa #include "qemu/cutils.h"
15d84a78d1SMatheus Tavares Bernardino #include "qemu/host-utils.h"
16709e3fe8SJuan Quintela #include "xbzrle.h"
1760fe637bSDr. David Alan Gilbert 
181b48d0abSRichard Henderson #if defined(CONFIG_AVX512BW_OPT)
191b48d0abSRichard Henderson #include <immintrin.h>
20*7ba7db9fSRichard Henderson #include "host/cpuinfo.h"
211b48d0abSRichard Henderson 
22*7ba7db9fSRichard Henderson static int __attribute__((target("avx512bw")))
xbzrle_encode_buffer_avx512(uint8_t * old_buf,uint8_t * new_buf,int slen,uint8_t * dst,int dlen)231b48d0abSRichard Henderson xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
241b48d0abSRichard Henderson                             uint8_t *dst, int dlen)
251b48d0abSRichard Henderson {
261b48d0abSRichard Henderson     uint32_t zrun_len = 0, nzrun_len = 0;
271b48d0abSRichard Henderson     int d = 0, i = 0, num = 0;
281b48d0abSRichard Henderson     uint8_t *nzrun_start = NULL;
291b48d0abSRichard Henderson     /* add 1 to include residual part in main loop */
301b48d0abSRichard Henderson     uint32_t count512s = (slen >> 6) + 1;
311b48d0abSRichard Henderson     /* countResidual is tail of data, i.e., countResidual = slen % 64 */
321b48d0abSRichard Henderson     uint32_t count_residual = slen & 0b111111;
331b48d0abSRichard Henderson     bool never_same = true;
341b48d0abSRichard Henderson     uint64_t mask_residual = 1;
351b48d0abSRichard Henderson     mask_residual <<= count_residual;
361b48d0abSRichard Henderson     mask_residual -= 1;
371b48d0abSRichard Henderson     __m512i r = _mm512_set1_epi32(0);
381b48d0abSRichard Henderson 
391b48d0abSRichard Henderson     while (count512s) {
401b48d0abSRichard Henderson         int bytes_to_check = 64;
411b48d0abSRichard Henderson         uint64_t mask = 0xffffffffffffffff;
421b48d0abSRichard Henderson         if (count512s == 1) {
431b48d0abSRichard Henderson             bytes_to_check = count_residual;
441b48d0abSRichard Henderson             mask = mask_residual;
451b48d0abSRichard Henderson         }
461b48d0abSRichard Henderson         __m512i old_data = _mm512_mask_loadu_epi8(r,
471b48d0abSRichard Henderson                                                   mask, old_buf + i);
481b48d0abSRichard Henderson         __m512i new_data = _mm512_mask_loadu_epi8(r,
491b48d0abSRichard Henderson                                                   mask, new_buf + i);
501b48d0abSRichard Henderson         uint64_t comp = _mm512_cmpeq_epi8_mask(old_data, new_data);
511b48d0abSRichard Henderson         count512s--;
521b48d0abSRichard Henderson 
531b48d0abSRichard Henderson         bool is_same = (comp & 0x1);
541b48d0abSRichard Henderson         while (bytes_to_check) {
551b48d0abSRichard Henderson             if (d + 2 > dlen) {
561b48d0abSRichard Henderson                 return -1;
571b48d0abSRichard Henderson             }
581b48d0abSRichard Henderson             if (is_same) {
591b48d0abSRichard Henderson                 if (nzrun_len) {
601b48d0abSRichard Henderson                     d += uleb128_encode_small(dst + d, nzrun_len);
611b48d0abSRichard Henderson                     if (d + nzrun_len > dlen) {
621b48d0abSRichard Henderson                         return -1;
631b48d0abSRichard Henderson                     }
641b48d0abSRichard Henderson                     nzrun_start = new_buf + i - nzrun_len;
651b48d0abSRichard Henderson                     memcpy(dst + d, nzrun_start, nzrun_len);
661b48d0abSRichard Henderson                     d += nzrun_len;
671b48d0abSRichard Henderson                     nzrun_len = 0;
681b48d0abSRichard Henderson                 }
691b48d0abSRichard Henderson                 /* 64 data at a time for speed */
701b48d0abSRichard Henderson                 if (count512s && (comp == 0xffffffffffffffff)) {
711b48d0abSRichard Henderson                     i += 64;
721b48d0abSRichard Henderson                     zrun_len += 64;
731b48d0abSRichard Henderson                     break;
741b48d0abSRichard Henderson                 }
751b48d0abSRichard Henderson                 never_same = false;
761b48d0abSRichard Henderson                 num = ctz64(~comp);
771b48d0abSRichard Henderson                 num = (num < bytes_to_check) ? num : bytes_to_check;
781b48d0abSRichard Henderson                 zrun_len += num;
791b48d0abSRichard Henderson                 bytes_to_check -= num;
801b48d0abSRichard Henderson                 comp >>= num;
811b48d0abSRichard Henderson                 i += num;
821b48d0abSRichard Henderson                 if (bytes_to_check) {
831b48d0abSRichard Henderson                     /* still has different data after same data */
841b48d0abSRichard Henderson                     d += uleb128_encode_small(dst + d, zrun_len);
851b48d0abSRichard Henderson                     zrun_len = 0;
861b48d0abSRichard Henderson                 } else {
871b48d0abSRichard Henderson                     break;
881b48d0abSRichard Henderson                 }
891b48d0abSRichard Henderson             }
901b48d0abSRichard Henderson             if (never_same || zrun_len) {
911b48d0abSRichard Henderson                 /*
921b48d0abSRichard Henderson                  * never_same only acts if
931b48d0abSRichard Henderson                  * data begins with diff in first count512s
941b48d0abSRichard Henderson                  */
951b48d0abSRichard Henderson                 d += uleb128_encode_small(dst + d, zrun_len);
961b48d0abSRichard Henderson                 zrun_len = 0;
971b48d0abSRichard Henderson                 never_same = false;
981b48d0abSRichard Henderson             }
991b48d0abSRichard Henderson             /* has diff, 64 data at a time for speed */
1001b48d0abSRichard Henderson             if ((bytes_to_check == 64) && (comp == 0x0)) {
1011b48d0abSRichard Henderson                 i += 64;
1021b48d0abSRichard Henderson                 nzrun_len += 64;
1031b48d0abSRichard Henderson                 break;
1041b48d0abSRichard Henderson             }
1051b48d0abSRichard Henderson             num = ctz64(comp);
1061b48d0abSRichard Henderson             num = (num < bytes_to_check) ? num : bytes_to_check;
1071b48d0abSRichard Henderson             nzrun_len += num;
1081b48d0abSRichard Henderson             bytes_to_check -= num;
1091b48d0abSRichard Henderson             comp >>= num;
1101b48d0abSRichard Henderson             i += num;
1111b48d0abSRichard Henderson             if (bytes_to_check) {
1121b48d0abSRichard Henderson                 /* mask like 111000 */
1131b48d0abSRichard Henderson                 d += uleb128_encode_small(dst + d, nzrun_len);
1141b48d0abSRichard Henderson                 /* overflow */
1151b48d0abSRichard Henderson                 if (d + nzrun_len > dlen) {
1161b48d0abSRichard Henderson                     return -1;
1171b48d0abSRichard Henderson                 }
1181b48d0abSRichard Henderson                 nzrun_start = new_buf + i - nzrun_len;
1191b48d0abSRichard Henderson                 memcpy(dst + d, nzrun_start, nzrun_len);
1201b48d0abSRichard Henderson                 d += nzrun_len;
1211b48d0abSRichard Henderson                 nzrun_len = 0;
1221b48d0abSRichard Henderson                 is_same = true;
1231b48d0abSRichard Henderson             }
1241b48d0abSRichard Henderson         }
1251b48d0abSRichard Henderson     }
1261b48d0abSRichard Henderson 
1271b48d0abSRichard Henderson     if (nzrun_len != 0) {
1281b48d0abSRichard Henderson         d += uleb128_encode_small(dst + d, nzrun_len);
1291b48d0abSRichard Henderson         /* overflow */
1301b48d0abSRichard Henderson         if (d + nzrun_len > dlen) {
1311b48d0abSRichard Henderson             return -1;
1321b48d0abSRichard Henderson         }
1331b48d0abSRichard Henderson         nzrun_start = new_buf + i - nzrun_len;
1341b48d0abSRichard Henderson         memcpy(dst + d, nzrun_start, nzrun_len);
1351b48d0abSRichard Henderson         d += nzrun_len;
1361b48d0abSRichard Henderson     }
1371b48d0abSRichard Henderson     return d;
1381b48d0abSRichard Henderson }
139*7ba7db9fSRichard Henderson 
140*7ba7db9fSRichard Henderson static int xbzrle_encode_buffer_int(uint8_t *old_buf, uint8_t *new_buf,
141*7ba7db9fSRichard Henderson                                     int slen, uint8_t *dst, int dlen);
142*7ba7db9fSRichard Henderson 
143*7ba7db9fSRichard Henderson static int (*accel_func)(uint8_t *, uint8_t *, int, uint8_t *, int);
144*7ba7db9fSRichard Henderson 
init_accel(void)145*7ba7db9fSRichard Henderson static void __attribute__((constructor)) init_accel(void)
146*7ba7db9fSRichard Henderson {
147*7ba7db9fSRichard Henderson     unsigned info = cpuinfo_init();
148*7ba7db9fSRichard Henderson     if (info & CPUINFO_AVX512BW) {
149*7ba7db9fSRichard Henderson         accel_func = xbzrle_encode_buffer_avx512;
150*7ba7db9fSRichard Henderson     } else {
151*7ba7db9fSRichard Henderson         accel_func = xbzrle_encode_buffer_int;
152*7ba7db9fSRichard Henderson     }
153*7ba7db9fSRichard Henderson }
154*7ba7db9fSRichard Henderson 
xbzrle_encode_buffer(uint8_t * old_buf,uint8_t * new_buf,int slen,uint8_t * dst,int dlen)155*7ba7db9fSRichard Henderson int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen,
156*7ba7db9fSRichard Henderson                          uint8_t *dst, int dlen)
157*7ba7db9fSRichard Henderson {
158*7ba7db9fSRichard Henderson     return accel_func(old_buf, new_buf, slen, dst, dlen);
159*7ba7db9fSRichard Henderson }
160*7ba7db9fSRichard Henderson 
161*7ba7db9fSRichard Henderson #define xbzrle_encode_buffer xbzrle_encode_buffer_int
1621b48d0abSRichard Henderson #endif
1631b48d0abSRichard Henderson 
16460fe637bSDr. David Alan Gilbert /*
16560fe637bSDr. David Alan Gilbert   page = zrun nzrun
16660fe637bSDr. David Alan Gilbert        | zrun nzrun page
16760fe637bSDr. David Alan Gilbert 
16860fe637bSDr. David Alan Gilbert   zrun = length
16960fe637bSDr. David Alan Gilbert 
17060fe637bSDr. David Alan Gilbert   nzrun = length byte...
17160fe637bSDr. David Alan Gilbert 
17260fe637bSDr. David Alan Gilbert   length = uleb128 encoded integer
17360fe637bSDr. David Alan Gilbert  */
xbzrle_encode_buffer(uint8_t * old_buf,uint8_t * new_buf,int slen,uint8_t * dst,int dlen)17460fe637bSDr. David Alan Gilbert int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen,
17560fe637bSDr. David Alan Gilbert                          uint8_t *dst, int dlen)
17660fe637bSDr. David Alan Gilbert {
17760fe637bSDr. David Alan Gilbert     uint32_t zrun_len = 0, nzrun_len = 0;
17860fe637bSDr. David Alan Gilbert     int d = 0, i = 0;
17960fe637bSDr. David Alan Gilbert     long res;
18060fe637bSDr. David Alan Gilbert     uint8_t *nzrun_start = NULL;
18160fe637bSDr. David Alan Gilbert 
18260fe637bSDr. David Alan Gilbert     g_assert(!(((uintptr_t)old_buf | (uintptr_t)new_buf | slen) %
18360fe637bSDr. David Alan Gilbert                sizeof(long)));
18460fe637bSDr. David Alan Gilbert 
18560fe637bSDr. David Alan Gilbert     while (i < slen) {
18660fe637bSDr. David Alan Gilbert         /* overflow */
18760fe637bSDr. David Alan Gilbert         if (d + 2 > dlen) {
18860fe637bSDr. David Alan Gilbert             return -1;
18960fe637bSDr. David Alan Gilbert         }
19060fe637bSDr. David Alan Gilbert 
19160fe637bSDr. David Alan Gilbert         /* not aligned to sizeof(long) */
19260fe637bSDr. David Alan Gilbert         res = (slen - i) % sizeof(long);
19360fe637bSDr. David Alan Gilbert         while (res && old_buf[i] == new_buf[i]) {
19460fe637bSDr. David Alan Gilbert             zrun_len++;
19560fe637bSDr. David Alan Gilbert             i++;
19660fe637bSDr. David Alan Gilbert             res--;
19760fe637bSDr. David Alan Gilbert         }
19860fe637bSDr. David Alan Gilbert 
19960fe637bSDr. David Alan Gilbert         /* word at a time for speed */
20060fe637bSDr. David Alan Gilbert         if (!res) {
20160fe637bSDr. David Alan Gilbert             while (i < slen &&
20260fe637bSDr. David Alan Gilbert                    (*(long *)(old_buf + i)) == (*(long *)(new_buf + i))) {
20360fe637bSDr. David Alan Gilbert                 i += sizeof(long);
20460fe637bSDr. David Alan Gilbert                 zrun_len += sizeof(long);
20560fe637bSDr. David Alan Gilbert             }
20660fe637bSDr. David Alan Gilbert 
20760fe637bSDr. David Alan Gilbert             /* go over the rest */
20860fe637bSDr. David Alan Gilbert             while (i < slen && old_buf[i] == new_buf[i]) {
20960fe637bSDr. David Alan Gilbert                 zrun_len++;
21060fe637bSDr. David Alan Gilbert                 i++;
21160fe637bSDr. David Alan Gilbert             }
21260fe637bSDr. David Alan Gilbert         }
21360fe637bSDr. David Alan Gilbert 
21460fe637bSDr. David Alan Gilbert         /* buffer unchanged */
21560fe637bSDr. David Alan Gilbert         if (zrun_len == slen) {
21660fe637bSDr. David Alan Gilbert             return 0;
21760fe637bSDr. David Alan Gilbert         }
21860fe637bSDr. David Alan Gilbert 
21960fe637bSDr. David Alan Gilbert         /* skip last zero run */
22060fe637bSDr. David Alan Gilbert         if (i == slen) {
22160fe637bSDr. David Alan Gilbert             return d;
22260fe637bSDr. David Alan Gilbert         }
22360fe637bSDr. David Alan Gilbert 
22460fe637bSDr. David Alan Gilbert         d += uleb128_encode_small(dst + d, zrun_len);
22560fe637bSDr. David Alan Gilbert 
22660fe637bSDr. David Alan Gilbert         zrun_len = 0;
22760fe637bSDr. David Alan Gilbert         nzrun_start = new_buf + i;
22860fe637bSDr. David Alan Gilbert 
22960fe637bSDr. David Alan Gilbert         /* overflow */
23060fe637bSDr. David Alan Gilbert         if (d + 2 > dlen) {
23160fe637bSDr. David Alan Gilbert             return -1;
23260fe637bSDr. David Alan Gilbert         }
23360fe637bSDr. David Alan Gilbert         /* not aligned to sizeof(long) */
23460fe637bSDr. David Alan Gilbert         res = (slen - i) % sizeof(long);
23560fe637bSDr. David Alan Gilbert         while (res && old_buf[i] != new_buf[i]) {
23660fe637bSDr. David Alan Gilbert             i++;
23760fe637bSDr. David Alan Gilbert             nzrun_len++;
23860fe637bSDr. David Alan Gilbert             res--;
23960fe637bSDr. David Alan Gilbert         }
24060fe637bSDr. David Alan Gilbert 
24160fe637bSDr. David Alan Gilbert         /* word at a time for speed, use of 32-bit long okay */
24260fe637bSDr. David Alan Gilbert         if (!res) {
24360fe637bSDr. David Alan Gilbert             /* truncation to 32-bit long okay */
24460fe637bSDr. David Alan Gilbert             unsigned long mask = (unsigned long)0x0101010101010101ULL;
24560fe637bSDr. David Alan Gilbert             while (i < slen) {
24660fe637bSDr. David Alan Gilbert                 unsigned long xor;
24760fe637bSDr. David Alan Gilbert                 xor = *(unsigned long *)(old_buf + i)
24860fe637bSDr. David Alan Gilbert                     ^ *(unsigned long *)(new_buf + i);
24960fe637bSDr. David Alan Gilbert                 if ((xor - mask) & ~xor & (mask << 7)) {
25060fe637bSDr. David Alan Gilbert                     /* found the end of an nzrun within the current long */
25160fe637bSDr. David Alan Gilbert                     while (old_buf[i] != new_buf[i]) {
25260fe637bSDr. David Alan Gilbert                         nzrun_len++;
25360fe637bSDr. David Alan Gilbert                         i++;
25460fe637bSDr. David Alan Gilbert                     }
25560fe637bSDr. David Alan Gilbert                     break;
25660fe637bSDr. David Alan Gilbert                 } else {
25760fe637bSDr. David Alan Gilbert                     i += sizeof(long);
25860fe637bSDr. David Alan Gilbert                     nzrun_len += sizeof(long);
25960fe637bSDr. David Alan Gilbert                 }
26060fe637bSDr. David Alan Gilbert             }
26160fe637bSDr. David Alan Gilbert         }
26260fe637bSDr. David Alan Gilbert 
26360fe637bSDr. David Alan Gilbert         d += uleb128_encode_small(dst + d, nzrun_len);
26460fe637bSDr. David Alan Gilbert         /* overflow */
26560fe637bSDr. David Alan Gilbert         if (d + nzrun_len > dlen) {
26660fe637bSDr. David Alan Gilbert             return -1;
26760fe637bSDr. David Alan Gilbert         }
26860fe637bSDr. David Alan Gilbert         memcpy(dst + d, nzrun_start, nzrun_len);
26960fe637bSDr. David Alan Gilbert         d += nzrun_len;
27060fe637bSDr. David Alan Gilbert         nzrun_len = 0;
27160fe637bSDr. David Alan Gilbert     }
27260fe637bSDr. David Alan Gilbert 
27360fe637bSDr. David Alan Gilbert     return d;
27460fe637bSDr. David Alan Gilbert }
27560fe637bSDr. David Alan Gilbert 
xbzrle_decode_buffer(uint8_t * src,int slen,uint8_t * dst,int dlen)27660fe637bSDr. David Alan Gilbert int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen)
27760fe637bSDr. David Alan Gilbert {
27860fe637bSDr. David Alan Gilbert     int i = 0, d = 0;
27960fe637bSDr. David Alan Gilbert     int ret;
28060fe637bSDr. David Alan Gilbert     uint32_t count = 0;
28160fe637bSDr. David Alan Gilbert 
28260fe637bSDr. David Alan Gilbert     while (i < slen) {
28360fe637bSDr. David Alan Gilbert 
28460fe637bSDr. David Alan Gilbert         /* zrun */
28560fe637bSDr. David Alan Gilbert         if ((slen - i) < 2) {
28660fe637bSDr. David Alan Gilbert             return -1;
28760fe637bSDr. David Alan Gilbert         }
28860fe637bSDr. David Alan Gilbert 
28960fe637bSDr. David Alan Gilbert         ret = uleb128_decode_small(src + i, &count);
29060fe637bSDr. David Alan Gilbert         if (ret < 0 || (i && !count)) {
29160fe637bSDr. David Alan Gilbert             return -1;
29260fe637bSDr. David Alan Gilbert         }
29360fe637bSDr. David Alan Gilbert         i += ret;
29460fe637bSDr. David Alan Gilbert         d += count;
29560fe637bSDr. David Alan Gilbert 
29660fe637bSDr. David Alan Gilbert         /* overflow */
29760fe637bSDr. David Alan Gilbert         if (d > dlen) {
29860fe637bSDr. David Alan Gilbert             return -1;
29960fe637bSDr. David Alan Gilbert         }
30060fe637bSDr. David Alan Gilbert 
30160fe637bSDr. David Alan Gilbert         /* nzrun */
30260fe637bSDr. David Alan Gilbert         if ((slen - i) < 2) {
30360fe637bSDr. David Alan Gilbert             return -1;
30460fe637bSDr. David Alan Gilbert         }
30560fe637bSDr. David Alan Gilbert 
30660fe637bSDr. David Alan Gilbert         ret = uleb128_decode_small(src + i, &count);
30760fe637bSDr. David Alan Gilbert         if (ret < 0 || !count) {
30860fe637bSDr. David Alan Gilbert             return -1;
30960fe637bSDr. David Alan Gilbert         }
31060fe637bSDr. David Alan Gilbert         i += ret;
31160fe637bSDr. David Alan Gilbert 
31260fe637bSDr. David Alan Gilbert         /* overflow */
31360fe637bSDr. David Alan Gilbert         if (d + count > dlen || i + count > slen) {
31460fe637bSDr. David Alan Gilbert             return -1;
31560fe637bSDr. David Alan Gilbert         }
31660fe637bSDr. David Alan Gilbert 
31760fe637bSDr. David Alan Gilbert         memcpy(dst + d, src + i, count);
31860fe637bSDr. David Alan Gilbert         d += count;
31960fe637bSDr. David Alan Gilbert         i += count;
32060fe637bSDr. David Alan Gilbert     }
32160fe637bSDr. David Alan Gilbert 
32260fe637bSDr. David Alan Gilbert     return d;
32360fe637bSDr. David Alan Gilbert }
324