1 /* 2 * Xor Based Zero Run Length Encoding 3 * 4 * Copyright 2013 Red Hat, Inc. and/or its affiliates 5 * 6 * Authors: 7 * Orit Wasserman <owasserm@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2 or later. 10 * See the COPYING file in the top-level directory. 11 * 12 */ 13 #include "qemu/osdep.h" 14 #include "qemu/cutils.h" 15 #include "qemu/host-utils.h" 16 #include "xbzrle.h" 17 18 /* 19 page = zrun nzrun 20 | zrun nzrun page 21 22 zrun = length 23 24 nzrun = length byte... 25 26 length = uleb128 encoded integer 27 */ 28 int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen, 29 uint8_t *dst, int dlen) 30 { 31 uint32_t zrun_len = 0, nzrun_len = 0; 32 int d = 0, i = 0; 33 long res; 34 uint8_t *nzrun_start = NULL; 35 36 g_assert(!(((uintptr_t)old_buf | (uintptr_t)new_buf | slen) % 37 sizeof(long))); 38 39 while (i < slen) { 40 /* overflow */ 41 if (d + 2 > dlen) { 42 return -1; 43 } 44 45 /* not aligned to sizeof(long) */ 46 res = (slen - i) % sizeof(long); 47 while (res && old_buf[i] == new_buf[i]) { 48 zrun_len++; 49 i++; 50 res--; 51 } 52 53 /* word at a time for speed */ 54 if (!res) { 55 while (i < slen && 56 (*(long *)(old_buf + i)) == (*(long *)(new_buf + i))) { 57 i += sizeof(long); 58 zrun_len += sizeof(long); 59 } 60 61 /* go over the rest */ 62 while (i < slen && old_buf[i] == new_buf[i]) { 63 zrun_len++; 64 i++; 65 } 66 } 67 68 /* buffer unchanged */ 69 if (zrun_len == slen) { 70 return 0; 71 } 72 73 /* skip last zero run */ 74 if (i == slen) { 75 return d; 76 } 77 78 d += uleb128_encode_small(dst + d, zrun_len); 79 80 zrun_len = 0; 81 nzrun_start = new_buf + i; 82 83 /* overflow */ 84 if (d + 2 > dlen) { 85 return -1; 86 } 87 /* not aligned to sizeof(long) */ 88 res = (slen - i) % sizeof(long); 89 while (res && old_buf[i] != new_buf[i]) { 90 i++; 91 nzrun_len++; 92 res--; 93 } 94 95 /* word at a time for speed, use of 32-bit long okay */ 96 if (!res) { 97 /* truncation to 32-bit long okay */ 98 unsigned long mask = (unsigned long)0x0101010101010101ULL; 99 while (i < slen) { 100 unsigned long xor; 101 xor = *(unsigned long *)(old_buf + i) 102 ^ *(unsigned long *)(new_buf + i); 103 if ((xor - mask) & ~xor & (mask << 7)) { 104 /* found the end of an nzrun within the current long */ 105 while (old_buf[i] != new_buf[i]) { 106 nzrun_len++; 107 i++; 108 } 109 break; 110 } else { 111 i += sizeof(long); 112 nzrun_len += sizeof(long); 113 } 114 } 115 } 116 117 d += uleb128_encode_small(dst + d, nzrun_len); 118 /* overflow */ 119 if (d + nzrun_len > dlen) { 120 return -1; 121 } 122 memcpy(dst + d, nzrun_start, nzrun_len); 123 d += nzrun_len; 124 nzrun_len = 0; 125 } 126 127 return d; 128 } 129 130 int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen) 131 { 132 int i = 0, d = 0; 133 int ret; 134 uint32_t count = 0; 135 136 while (i < slen) { 137 138 /* zrun */ 139 if ((slen - i) < 2) { 140 return -1; 141 } 142 143 ret = uleb128_decode_small(src + i, &count); 144 if (ret < 0 || (i && !count)) { 145 return -1; 146 } 147 i += ret; 148 d += count; 149 150 /* overflow */ 151 if (d > dlen) { 152 return -1; 153 } 154 155 /* nzrun */ 156 if ((slen - i) < 2) { 157 return -1; 158 } 159 160 ret = uleb128_decode_small(src + i, &count); 161 if (ret < 0 || !count) { 162 return -1; 163 } 164 i += ret; 165 166 /* overflow */ 167 if (d + count > dlen || i + count > slen) { 168 return -1; 169 } 170 171 memcpy(dst + d, src + i, count); 172 d += count; 173 i += count; 174 } 175 176 return d; 177 } 178 179 #if defined(CONFIG_AVX512BW_OPT) 180 #include <immintrin.h> 181 182 int __attribute__((target("avx512bw"))) 183 xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen, 184 uint8_t *dst, int dlen) 185 { 186 uint32_t zrun_len = 0, nzrun_len = 0; 187 int d = 0, i = 0, num = 0; 188 uint8_t *nzrun_start = NULL; 189 /* add 1 to include residual part in main loop */ 190 uint32_t count512s = (slen >> 6) + 1; 191 /* countResidual is tail of data, i.e., countResidual = slen % 64 */ 192 uint32_t count_residual = slen & 0b111111; 193 bool never_same = true; 194 uint64_t mask_residual = 1; 195 mask_residual <<= count_residual; 196 mask_residual -= 1; 197 __m512i r = _mm512_set1_epi32(0); 198 199 while (count512s) { 200 int bytes_to_check = 64; 201 uint64_t mask = 0xffffffffffffffff; 202 if (count512s == 1) { 203 bytes_to_check = count_residual; 204 mask = mask_residual; 205 } 206 __m512i old_data = _mm512_mask_loadu_epi8(r, 207 mask, old_buf + i); 208 __m512i new_data = _mm512_mask_loadu_epi8(r, 209 mask, new_buf + i); 210 uint64_t comp = _mm512_cmpeq_epi8_mask(old_data, new_data); 211 count512s--; 212 213 bool is_same = (comp & 0x1); 214 while (bytes_to_check) { 215 if (d + 2 > dlen) { 216 return -1; 217 } 218 if (is_same) { 219 if (nzrun_len) { 220 d += uleb128_encode_small(dst + d, nzrun_len); 221 if (d + nzrun_len > dlen) { 222 return -1; 223 } 224 nzrun_start = new_buf + i - nzrun_len; 225 memcpy(dst + d, nzrun_start, nzrun_len); 226 d += nzrun_len; 227 nzrun_len = 0; 228 } 229 /* 64 data at a time for speed */ 230 if (count512s && (comp == 0xffffffffffffffff)) { 231 i += 64; 232 zrun_len += 64; 233 break; 234 } 235 never_same = false; 236 num = ctz64(~comp); 237 num = (num < bytes_to_check) ? num : bytes_to_check; 238 zrun_len += num; 239 bytes_to_check -= num; 240 comp >>= num; 241 i += num; 242 if (bytes_to_check) { 243 /* still has different data after same data */ 244 d += uleb128_encode_small(dst + d, zrun_len); 245 zrun_len = 0; 246 } else { 247 break; 248 } 249 } 250 if (never_same || zrun_len) { 251 /* 252 * never_same only acts if 253 * data begins with diff in first count512s 254 */ 255 d += uleb128_encode_small(dst + d, zrun_len); 256 zrun_len = 0; 257 never_same = false; 258 } 259 /* has diff, 64 data at a time for speed */ 260 if ((bytes_to_check == 64) && (comp == 0x0)) { 261 i += 64; 262 nzrun_len += 64; 263 break; 264 } 265 num = ctz64(comp); 266 num = (num < bytes_to_check) ? num : bytes_to_check; 267 nzrun_len += num; 268 bytes_to_check -= num; 269 comp >>= num; 270 i += num; 271 if (bytes_to_check) { 272 /* mask like 111000 */ 273 d += uleb128_encode_small(dst + d, nzrun_len); 274 /* overflow */ 275 if (d + nzrun_len > dlen) { 276 return -1; 277 } 278 nzrun_start = new_buf + i - nzrun_len; 279 memcpy(dst + d, nzrun_start, nzrun_len); 280 d += nzrun_len; 281 nzrun_len = 0; 282 is_same = true; 283 } 284 } 285 } 286 287 if (nzrun_len != 0) { 288 d += uleb128_encode_small(dst + d, nzrun_len); 289 /* overflow */ 290 if (d + nzrun_len > dlen) { 291 return -1; 292 } 293 nzrun_start = new_buf + i - nzrun_len; 294 memcpy(dst + d, nzrun_start, nzrun_len); 295 d += nzrun_len; 296 } 297 return d; 298 } 299 #endif 300