xref: /openbmc/qemu/migration/xbzrle.c (revision d044b7c3)
1 /*
2  * Xor Based Zero Run Length Encoding
3  *
4  * Copyright 2013 Red Hat, Inc. and/or its affiliates
5  *
6  * Authors:
7  *  Orit Wasserman  <owasserm@redhat.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2 or later.
10  * See the COPYING file in the top-level directory.
11  *
12  */
13 #include "qemu/osdep.h"
14 #include "qemu/cutils.h"
15 #include "qemu/host-utils.h"
16 #include "xbzrle.h"
17 
18 /*
19   page = zrun nzrun
20        | zrun nzrun page
21 
22   zrun = length
23 
24   nzrun = length byte...
25 
26   length = uleb128 encoded integer
27  */
28 int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen,
29                          uint8_t *dst, int dlen)
30 {
31     uint32_t zrun_len = 0, nzrun_len = 0;
32     int d = 0, i = 0;
33     long res;
34     uint8_t *nzrun_start = NULL;
35 
36     g_assert(!(((uintptr_t)old_buf | (uintptr_t)new_buf | slen) %
37                sizeof(long)));
38 
39     while (i < slen) {
40         /* overflow */
41         if (d + 2 > dlen) {
42             return -1;
43         }
44 
45         /* not aligned to sizeof(long) */
46         res = (slen - i) % sizeof(long);
47         while (res && old_buf[i] == new_buf[i]) {
48             zrun_len++;
49             i++;
50             res--;
51         }
52 
53         /* word at a time for speed */
54         if (!res) {
55             while (i < slen &&
56                    (*(long *)(old_buf + i)) == (*(long *)(new_buf + i))) {
57                 i += sizeof(long);
58                 zrun_len += sizeof(long);
59             }
60 
61             /* go over the rest */
62             while (i < slen && old_buf[i] == new_buf[i]) {
63                 zrun_len++;
64                 i++;
65             }
66         }
67 
68         /* buffer unchanged */
69         if (zrun_len == slen) {
70             return 0;
71         }
72 
73         /* skip last zero run */
74         if (i == slen) {
75             return d;
76         }
77 
78         d += uleb128_encode_small(dst + d, zrun_len);
79 
80         zrun_len = 0;
81         nzrun_start = new_buf + i;
82 
83         /* overflow */
84         if (d + 2 > dlen) {
85             return -1;
86         }
87         /* not aligned to sizeof(long) */
88         res = (slen - i) % sizeof(long);
89         while (res && old_buf[i] != new_buf[i]) {
90             i++;
91             nzrun_len++;
92             res--;
93         }
94 
95         /* word at a time for speed, use of 32-bit long okay */
96         if (!res) {
97             /* truncation to 32-bit long okay */
98             unsigned long mask = (unsigned long)0x0101010101010101ULL;
99             while (i < slen) {
100                 unsigned long xor;
101                 xor = *(unsigned long *)(old_buf + i)
102                     ^ *(unsigned long *)(new_buf + i);
103                 if ((xor - mask) & ~xor & (mask << 7)) {
104                     /* found the end of an nzrun within the current long */
105                     while (old_buf[i] != new_buf[i]) {
106                         nzrun_len++;
107                         i++;
108                     }
109                     break;
110                 } else {
111                     i += sizeof(long);
112                     nzrun_len += sizeof(long);
113                 }
114             }
115         }
116 
117         d += uleb128_encode_small(dst + d, nzrun_len);
118         /* overflow */
119         if (d + nzrun_len > dlen) {
120             return -1;
121         }
122         memcpy(dst + d, nzrun_start, nzrun_len);
123         d += nzrun_len;
124         nzrun_len = 0;
125     }
126 
127     return d;
128 }
129 
130 int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen)
131 {
132     int i = 0, d = 0;
133     int ret;
134     uint32_t count = 0;
135 
136     while (i < slen) {
137 
138         /* zrun */
139         if ((slen - i) < 2) {
140             return -1;
141         }
142 
143         ret = uleb128_decode_small(src + i, &count);
144         if (ret < 0 || (i && !count)) {
145             return -1;
146         }
147         i += ret;
148         d += count;
149 
150         /* overflow */
151         if (d > dlen) {
152             return -1;
153         }
154 
155         /* nzrun */
156         if ((slen - i) < 2) {
157             return -1;
158         }
159 
160         ret = uleb128_decode_small(src + i, &count);
161         if (ret < 0 || !count) {
162             return -1;
163         }
164         i += ret;
165 
166         /* overflow */
167         if (d + count > dlen || i + count > slen) {
168             return -1;
169         }
170 
171         memcpy(dst + d, src + i, count);
172         d += count;
173         i += count;
174     }
175 
176     return d;
177 }
178 
179 #if defined(CONFIG_AVX512BW_OPT)
180 #pragma GCC push_options
181 #pragma GCC target("avx512bw")
182 #include <immintrin.h>
183 int xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
184                              uint8_t *dst, int dlen)
185 {
186     uint32_t zrun_len = 0, nzrun_len = 0;
187     int d = 0, i = 0, num = 0;
188     uint8_t *nzrun_start = NULL;
189     /* add 1 to include residual part in main loop */
190     uint32_t count512s = (slen >> 6) + 1;
191     /* countResidual is tail of data, i.e., countResidual = slen % 64 */
192     uint32_t count_residual = slen & 0b111111;
193     bool never_same = true;
194     uint64_t mask_residual = 1;
195     mask_residual <<= count_residual;
196     mask_residual -= 1;
197     __m512i r = _mm512_set1_epi32(0);
198 
199     while (count512s) {
200         int bytes_to_check = 64;
201         uint64_t mask = 0xffffffffffffffff;
202         if (count512s == 1) {
203             bytes_to_check = count_residual;
204             mask = mask_residual;
205         }
206         __m512i old_data = _mm512_mask_loadu_epi8(r,
207                                                   mask, old_buf + i);
208         __m512i new_data = _mm512_mask_loadu_epi8(r,
209                                                   mask, new_buf + i);
210         uint64_t comp = _mm512_cmpeq_epi8_mask(old_data, new_data);
211         count512s--;
212 
213         bool is_same = (comp & 0x1);
214         while (bytes_to_check) {
215             if (d + 2 > dlen) {
216                 return -1;
217             }
218             if (is_same) {
219                 if (nzrun_len) {
220                     d += uleb128_encode_small(dst + d, nzrun_len);
221                     if (d + nzrun_len > dlen) {
222                         return -1;
223                     }
224                     nzrun_start = new_buf + i - nzrun_len;
225                     memcpy(dst + d, nzrun_start, nzrun_len);
226                     d += nzrun_len;
227                     nzrun_len = 0;
228                 }
229                 /* 64 data at a time for speed */
230                 if (count512s && (comp == 0xffffffffffffffff)) {
231                     i += 64;
232                     zrun_len += 64;
233                     break;
234                 }
235                 never_same = false;
236                 num = ctz64(~comp);
237                 num = (num < bytes_to_check) ? num : bytes_to_check;
238                 zrun_len += num;
239                 bytes_to_check -= num;
240                 comp >>= num;
241                 i += num;
242                 if (bytes_to_check) {
243                     /* still has different data after same data */
244                     d += uleb128_encode_small(dst + d, zrun_len);
245                     zrun_len = 0;
246                 } else {
247                     break;
248                 }
249             }
250             if (never_same || zrun_len) {
251                 /*
252                  * never_same only acts if
253                  * data begins with diff in first count512s
254                  */
255                 d += uleb128_encode_small(dst + d, zrun_len);
256                 zrun_len = 0;
257                 never_same = false;
258             }
259             /* has diff, 64 data at a time for speed */
260             if ((bytes_to_check == 64) && (comp == 0x0)) {
261                 i += 64;
262                 nzrun_len += 64;
263                 break;
264             }
265             num = ctz64(comp);
266             num = (num < bytes_to_check) ? num : bytes_to_check;
267             nzrun_len += num;
268             bytes_to_check -= num;
269             comp >>= num;
270             i += num;
271             if (bytes_to_check) {
272                 /* mask like 111000 */
273                 d += uleb128_encode_small(dst + d, nzrun_len);
274                 /* overflow */
275                 if (d + nzrun_len > dlen) {
276                     return -1;
277                 }
278                 nzrun_start = new_buf + i - nzrun_len;
279                 memcpy(dst + d, nzrun_start, nzrun_len);
280                 d += nzrun_len;
281                 nzrun_len = 0;
282                 is_same = true;
283             }
284         }
285     }
286 
287     if (nzrun_len != 0) {
288         d += uleb128_encode_small(dst + d, nzrun_len);
289         /* overflow */
290         if (d + nzrun_len > dlen) {
291             return -1;
292         }
293         nzrun_start = new_buf + i - nzrun_len;
294         memcpy(dst + d, nzrun_start, nzrun_len);
295         d += nzrun_len;
296     }
297     return d;
298 }
299 #pragma GCC pop_options
300 #endif
301