xref: /openbmc/qemu/migration/xbzrle.c (revision 919a0423)
1 /*
2  * Xor Based Zero Run Length Encoding
3  *
4  * Copyright 2013 Red Hat, Inc. and/or its affiliates
5  *
6  * Authors:
7  *  Orit Wasserman  <owasserm@redhat.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2 or later.
10  * See the COPYING file in the top-level directory.
11  *
12  */
13 #include "qemu/osdep.h"
14 #include "qemu/cutils.h"
15 #include "xbzrle.h"
16 
17 /*
18   page = zrun nzrun
19        | zrun nzrun page
20 
21   zrun = length
22 
23   nzrun = length byte...
24 
25   length = uleb128 encoded integer
26  */
27 int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen,
28                          uint8_t *dst, int dlen)
29 {
30     uint32_t zrun_len = 0, nzrun_len = 0;
31     int d = 0, i = 0;
32     long res;
33     uint8_t *nzrun_start = NULL;
34 
35     g_assert(!(((uintptr_t)old_buf | (uintptr_t)new_buf | slen) %
36                sizeof(long)));
37 
38     while (i < slen) {
39         /* overflow */
40         if (d + 2 > dlen) {
41             return -1;
42         }
43 
44         /* not aligned to sizeof(long) */
45         res = (slen - i) % sizeof(long);
46         while (res && old_buf[i] == new_buf[i]) {
47             zrun_len++;
48             i++;
49             res--;
50         }
51 
52         /* word at a time for speed */
53         if (!res) {
54             while (i < slen &&
55                    (*(long *)(old_buf + i)) == (*(long *)(new_buf + i))) {
56                 i += sizeof(long);
57                 zrun_len += sizeof(long);
58             }
59 
60             /* go over the rest */
61             while (i < slen && old_buf[i] == new_buf[i]) {
62                 zrun_len++;
63                 i++;
64             }
65         }
66 
67         /* buffer unchanged */
68         if (zrun_len == slen) {
69             return 0;
70         }
71 
72         /* skip last zero run */
73         if (i == slen) {
74             return d;
75         }
76 
77         d += uleb128_encode_small(dst + d, zrun_len);
78 
79         zrun_len = 0;
80         nzrun_start = new_buf + i;
81 
82         /* overflow */
83         if (d + 2 > dlen) {
84             return -1;
85         }
86         /* not aligned to sizeof(long) */
87         res = (slen - i) % sizeof(long);
88         while (res && old_buf[i] != new_buf[i]) {
89             i++;
90             nzrun_len++;
91             res--;
92         }
93 
94         /* word at a time for speed, use of 32-bit long okay */
95         if (!res) {
96             /* truncation to 32-bit long okay */
97             unsigned long mask = (unsigned long)0x0101010101010101ULL;
98             while (i < slen) {
99                 unsigned long xor;
100                 xor = *(unsigned long *)(old_buf + i)
101                     ^ *(unsigned long *)(new_buf + i);
102                 if ((xor - mask) & ~xor & (mask << 7)) {
103                     /* found the end of an nzrun within the current long */
104                     while (old_buf[i] != new_buf[i]) {
105                         nzrun_len++;
106                         i++;
107                     }
108                     break;
109                 } else {
110                     i += sizeof(long);
111                     nzrun_len += sizeof(long);
112                 }
113             }
114         }
115 
116         d += uleb128_encode_small(dst + d, nzrun_len);
117         /* overflow */
118         if (d + nzrun_len > dlen) {
119             return -1;
120         }
121         memcpy(dst + d, nzrun_start, nzrun_len);
122         d += nzrun_len;
123         nzrun_len = 0;
124     }
125 
126     return d;
127 }
128 
129 int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen)
130 {
131     int i = 0, d = 0;
132     int ret;
133     uint32_t count = 0;
134 
135     while (i < slen) {
136 
137         /* zrun */
138         if ((slen - i) < 2) {
139             return -1;
140         }
141 
142         ret = uleb128_decode_small(src + i, &count);
143         if (ret < 0 || (i && !count)) {
144             return -1;
145         }
146         i += ret;
147         d += count;
148 
149         /* overflow */
150         if (d > dlen) {
151             return -1;
152         }
153 
154         /* nzrun */
155         if ((slen - i) < 2) {
156             return -1;
157         }
158 
159         ret = uleb128_decode_small(src + i, &count);
160         if (ret < 0 || !count) {
161             return -1;
162         }
163         i += ret;
164 
165         /* overflow */
166         if (d + count > dlen || i + count > slen) {
167             return -1;
168         }
169 
170         memcpy(dst + d, src + i, count);
171         d += count;
172         i += count;
173     }
174 
175     return d;
176 }
177 
178 #if defined(CONFIG_AVX512BW_OPT)
179 #pragma GCC push_options
180 #pragma GCC target("avx512bw")
181 #include <immintrin.h>
182 int xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
183                              uint8_t *dst, int dlen)
184 {
185     uint32_t zrun_len = 0, nzrun_len = 0;
186     int d = 0, i = 0, num = 0;
187     uint8_t *nzrun_start = NULL;
188     /* add 1 to include residual part in main loop */
189     uint32_t count512s = (slen >> 6) + 1;
190     /* countResidual is tail of data, i.e., countResidual = slen % 64 */
191     uint32_t count_residual = slen & 0b111111;
192     bool never_same = true;
193     uint64_t mask_residual = 1;
194     mask_residual <<= count_residual;
195     mask_residual -= 1;
196     __m512i r = _mm512_set1_epi32(0);
197 
198     while (count512s) {
199         if (d + 2 > dlen) {
200             return -1;
201         }
202 
203         int bytes_to_check = 64;
204         uint64_t mask = 0xffffffffffffffff;
205         if (count512s == 1) {
206             bytes_to_check = count_residual;
207             mask = mask_residual;
208         }
209         __m512i old_data = _mm512_mask_loadu_epi8(r,
210                                                   mask, old_buf + i);
211         __m512i new_data = _mm512_mask_loadu_epi8(r,
212                                                   mask, new_buf + i);
213         uint64_t comp = _mm512_cmpeq_epi8_mask(old_data, new_data);
214         count512s--;
215 
216         bool is_same = (comp & 0x1);
217         while (bytes_to_check) {
218             if (is_same) {
219                 if (nzrun_len) {
220                     d += uleb128_encode_small(dst + d, nzrun_len);
221                     if (d + nzrun_len > dlen) {
222                         return -1;
223                     }
224                     nzrun_start = new_buf + i - nzrun_len;
225                     memcpy(dst + d, nzrun_start, nzrun_len);
226                     d += nzrun_len;
227                     nzrun_len = 0;
228                 }
229                 /* 64 data at a time for speed */
230                 if (count512s && (comp == 0xffffffffffffffff)) {
231                     i += 64;
232                     zrun_len += 64;
233                     break;
234                 }
235                 never_same = false;
236                 num = __builtin_ctzll(~comp);
237                 num = (num < bytes_to_check) ? num : bytes_to_check;
238                 zrun_len += num;
239                 bytes_to_check -= num;
240                 comp >>= num;
241                 i += num;
242                 if (bytes_to_check) {
243                     /* still has different data after same data */
244                     d += uleb128_encode_small(dst + d, zrun_len);
245                     zrun_len = 0;
246                 } else {
247                     break;
248                 }
249             }
250             if (never_same || zrun_len) {
251                 /*
252                  * never_same only acts if
253                  * data begins with diff in first count512s
254                  */
255                 d += uleb128_encode_small(dst + d, zrun_len);
256                 zrun_len = 0;
257                 never_same = false;
258             }
259             /* has diff, 64 data at a time for speed */
260             if ((bytes_to_check == 64) && (comp == 0x0)) {
261                 i += 64;
262                 nzrun_len += 64;
263                 break;
264             }
265             num = __builtin_ctzll(comp);
266             num = (num < bytes_to_check) ? num : bytes_to_check;
267             nzrun_len += num;
268             bytes_to_check -= num;
269             comp >>= num;
270             i += num;
271             if (bytes_to_check) {
272                 /* mask like 111000 */
273                 d += uleb128_encode_small(dst + d, nzrun_len);
274                 /* overflow */
275                 if (d + nzrun_len > dlen) {
276                     return -1;
277                 }
278                 nzrun_start = new_buf + i - nzrun_len;
279                 memcpy(dst + d, nzrun_start, nzrun_len);
280                 d += nzrun_len;
281                 nzrun_len = 0;
282                 is_same = true;
283             }
284         }
285     }
286 
287     if (nzrun_len != 0) {
288         d += uleb128_encode_small(dst + d, nzrun_len);
289         /* overflow */
290         if (d + nzrun_len > dlen) {
291             return -1;
292         }
293         nzrun_start = new_buf + i - nzrun_len;
294         memcpy(dst + d, nzrun_start, nzrun_len);
295         d += nzrun_len;
296     }
297     return d;
298 }
299 #pragma GCC pop_options
300 #endif
301