1 /* 2 * Xor Based Zero Run Length Encoding 3 * 4 * Copyright 2013 Red Hat, Inc. and/or its affiliates 5 * 6 * Authors: 7 * Orit Wasserman <owasserm@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2 or later. 10 * See the COPYING file in the top-level directory. 11 * 12 */ 13 #include "qemu/osdep.h" 14 #include "qemu/cutils.h" 15 #include "xbzrle.h" 16 17 /* 18 page = zrun nzrun 19 | zrun nzrun page 20 21 zrun = length 22 23 nzrun = length byte... 24 25 length = uleb128 encoded integer 26 */ 27 int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen, 28 uint8_t *dst, int dlen) 29 { 30 uint32_t zrun_len = 0, nzrun_len = 0; 31 int d = 0, i = 0; 32 long res; 33 uint8_t *nzrun_start = NULL; 34 35 g_assert(!(((uintptr_t)old_buf | (uintptr_t)new_buf | slen) % 36 sizeof(long))); 37 38 while (i < slen) { 39 /* overflow */ 40 if (d + 2 > dlen) { 41 return -1; 42 } 43 44 /* not aligned to sizeof(long) */ 45 res = (slen - i) % sizeof(long); 46 while (res && old_buf[i] == new_buf[i]) { 47 zrun_len++; 48 i++; 49 res--; 50 } 51 52 /* word at a time for speed */ 53 if (!res) { 54 while (i < slen && 55 (*(long *)(old_buf + i)) == (*(long *)(new_buf + i))) { 56 i += sizeof(long); 57 zrun_len += sizeof(long); 58 } 59 60 /* go over the rest */ 61 while (i < slen && old_buf[i] == new_buf[i]) { 62 zrun_len++; 63 i++; 64 } 65 } 66 67 /* buffer unchanged */ 68 if (zrun_len == slen) { 69 return 0; 70 } 71 72 /* skip last zero run */ 73 if (i == slen) { 74 return d; 75 } 76 77 d += uleb128_encode_small(dst + d, zrun_len); 78 79 zrun_len = 0; 80 nzrun_start = new_buf + i; 81 82 /* overflow */ 83 if (d + 2 > dlen) { 84 return -1; 85 } 86 /* not aligned to sizeof(long) */ 87 res = (slen - i) % sizeof(long); 88 while (res && old_buf[i] != new_buf[i]) { 89 i++; 90 nzrun_len++; 91 res--; 92 } 93 94 /* word at a time for speed, use of 32-bit long okay */ 95 if (!res) { 96 /* truncation to 32-bit long okay */ 97 unsigned long mask = (unsigned long)0x0101010101010101ULL; 98 while (i < slen) { 99 unsigned long xor; 100 xor = *(unsigned long *)(old_buf + i) 101 ^ *(unsigned long *)(new_buf + i); 102 if ((xor - mask) & ~xor & (mask << 7)) { 103 /* found the end of an nzrun within the current long */ 104 while (old_buf[i] != new_buf[i]) { 105 nzrun_len++; 106 i++; 107 } 108 break; 109 } else { 110 i += sizeof(long); 111 nzrun_len += sizeof(long); 112 } 113 } 114 } 115 116 d += uleb128_encode_small(dst + d, nzrun_len); 117 /* overflow */ 118 if (d + nzrun_len > dlen) { 119 return -1; 120 } 121 memcpy(dst + d, nzrun_start, nzrun_len); 122 d += nzrun_len; 123 nzrun_len = 0; 124 } 125 126 return d; 127 } 128 129 int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen) 130 { 131 int i = 0, d = 0; 132 int ret; 133 uint32_t count = 0; 134 135 while (i < slen) { 136 137 /* zrun */ 138 if ((slen - i) < 2) { 139 return -1; 140 } 141 142 ret = uleb128_decode_small(src + i, &count); 143 if (ret < 0 || (i && !count)) { 144 return -1; 145 } 146 i += ret; 147 d += count; 148 149 /* overflow */ 150 if (d > dlen) { 151 return -1; 152 } 153 154 /* nzrun */ 155 if ((slen - i) < 2) { 156 return -1; 157 } 158 159 ret = uleb128_decode_small(src + i, &count); 160 if (ret < 0 || !count) { 161 return -1; 162 } 163 i += ret; 164 165 /* overflow */ 166 if (d + count > dlen || i + count > slen) { 167 return -1; 168 } 169 170 memcpy(dst + d, src + i, count); 171 d += count; 172 i += count; 173 } 174 175 return d; 176 } 177 178 #if defined(CONFIG_AVX512BW_OPT) 179 #pragma GCC push_options 180 #pragma GCC target("avx512bw") 181 #include <immintrin.h> 182 int xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen, 183 uint8_t *dst, int dlen) 184 { 185 uint32_t zrun_len = 0, nzrun_len = 0; 186 int d = 0, i = 0, num = 0; 187 uint8_t *nzrun_start = NULL; 188 /* add 1 to include residual part in main loop */ 189 uint32_t count512s = (slen >> 6) + 1; 190 /* countResidual is tail of data, i.e., countResidual = slen % 64 */ 191 uint32_t count_residual = slen & 0b111111; 192 bool never_same = true; 193 uint64_t mask_residual = 1; 194 mask_residual <<= count_residual; 195 mask_residual -= 1; 196 __m512i r = _mm512_set1_epi32(0); 197 198 while (count512s) { 199 if (d + 2 > dlen) { 200 return -1; 201 } 202 203 int bytes_to_check = 64; 204 uint64_t mask = 0xffffffffffffffff; 205 if (count512s == 1) { 206 bytes_to_check = count_residual; 207 mask = mask_residual; 208 } 209 __m512i old_data = _mm512_mask_loadu_epi8(r, 210 mask, old_buf + i); 211 __m512i new_data = _mm512_mask_loadu_epi8(r, 212 mask, new_buf + i); 213 uint64_t comp = _mm512_cmpeq_epi8_mask(old_data, new_data); 214 count512s--; 215 216 bool is_same = (comp & 0x1); 217 while (bytes_to_check) { 218 if (is_same) { 219 if (nzrun_len) { 220 d += uleb128_encode_small(dst + d, nzrun_len); 221 if (d + nzrun_len > dlen) { 222 return -1; 223 } 224 nzrun_start = new_buf + i - nzrun_len; 225 memcpy(dst + d, nzrun_start, nzrun_len); 226 d += nzrun_len; 227 nzrun_len = 0; 228 } 229 /* 64 data at a time for speed */ 230 if (count512s && (comp == 0xffffffffffffffff)) { 231 i += 64; 232 zrun_len += 64; 233 break; 234 } 235 never_same = false; 236 num = __builtin_ctzll(~comp); 237 num = (num < bytes_to_check) ? num : bytes_to_check; 238 zrun_len += num; 239 bytes_to_check -= num; 240 comp >>= num; 241 i += num; 242 if (bytes_to_check) { 243 /* still has different data after same data */ 244 d += uleb128_encode_small(dst + d, zrun_len); 245 zrun_len = 0; 246 } else { 247 break; 248 } 249 } 250 if (never_same || zrun_len) { 251 /* 252 * never_same only acts if 253 * data begins with diff in first count512s 254 */ 255 d += uleb128_encode_small(dst + d, zrun_len); 256 zrun_len = 0; 257 never_same = false; 258 } 259 /* has diff, 64 data at a time for speed */ 260 if ((bytes_to_check == 64) && (comp == 0x0)) { 261 i += 64; 262 nzrun_len += 64; 263 break; 264 } 265 num = __builtin_ctzll(comp); 266 num = (num < bytes_to_check) ? num : bytes_to_check; 267 nzrun_len += num; 268 bytes_to_check -= num; 269 comp >>= num; 270 i += num; 271 if (bytes_to_check) { 272 /* mask like 111000 */ 273 d += uleb128_encode_small(dst + d, nzrun_len); 274 /* overflow */ 275 if (d + nzrun_len > dlen) { 276 return -1; 277 } 278 nzrun_start = new_buf + i - nzrun_len; 279 memcpy(dst + d, nzrun_start, nzrun_len); 280 d += nzrun_len; 281 nzrun_len = 0; 282 is_same = true; 283 } 284 } 285 } 286 287 if (nzrun_len != 0) { 288 d += uleb128_encode_small(dst + d, nzrun_len); 289 /* overflow */ 290 if (d + nzrun_len > dlen) { 291 return -1; 292 } 293 nzrun_start = new_buf + i - nzrun_len; 294 memcpy(dst + d, nzrun_start, nzrun_len); 295 d += nzrun_len; 296 } 297 return d; 298 } 299 #pragma GCC pop_options 300 #endif 301