1 /* 2 * Xor Based Zero Run Length Encoding 3 * 4 * Copyright 2013 Red Hat, Inc. and/or its affiliates 5 * 6 * Authors: 7 * Orit Wasserman <owasserm@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2 or later. 10 * See the COPYING file in the top-level directory. 11 * 12 */ 13 #include "qemu/osdep.h" 14 #include "qemu/cutils.h" 15 #include "qemu/host-utils.h" 16 #include "xbzrle.h" 17 18 #if defined(CONFIG_AVX512BW_OPT) 19 #include <immintrin.h> 20 #include "host/cpuinfo.h" 21 22 static int __attribute__((target("avx512bw"))) 23 xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen, 24 uint8_t *dst, int dlen) 25 { 26 uint32_t zrun_len = 0, nzrun_len = 0; 27 int d = 0, i = 0, num = 0; 28 uint8_t *nzrun_start = NULL; 29 /* add 1 to include residual part in main loop */ 30 uint32_t count512s = (slen >> 6) + 1; 31 /* countResidual is tail of data, i.e., countResidual = slen % 64 */ 32 uint32_t count_residual = slen & 0b111111; 33 bool never_same = true; 34 uint64_t mask_residual = 1; 35 mask_residual <<= count_residual; 36 mask_residual -= 1; 37 __m512i r = _mm512_set1_epi32(0); 38 39 while (count512s) { 40 int bytes_to_check = 64; 41 uint64_t mask = 0xffffffffffffffff; 42 if (count512s == 1) { 43 bytes_to_check = count_residual; 44 mask = mask_residual; 45 } 46 __m512i old_data = _mm512_mask_loadu_epi8(r, 47 mask, old_buf + i); 48 __m512i new_data = _mm512_mask_loadu_epi8(r, 49 mask, new_buf + i); 50 uint64_t comp = _mm512_cmpeq_epi8_mask(old_data, new_data); 51 count512s--; 52 53 bool is_same = (comp & 0x1); 54 while (bytes_to_check) { 55 if (d + 2 > dlen) { 56 return -1; 57 } 58 if (is_same) { 59 if (nzrun_len) { 60 d += uleb128_encode_small(dst + d, nzrun_len); 61 if (d + nzrun_len > dlen) { 62 return -1; 63 } 64 nzrun_start = new_buf + i - nzrun_len; 65 memcpy(dst + d, nzrun_start, nzrun_len); 66 d += nzrun_len; 67 nzrun_len = 0; 68 } 69 /* 64 data at a time for speed */ 70 if (count512s && (comp == 0xffffffffffffffff)) { 71 i += 64; 72 zrun_len += 64; 73 break; 74 } 75 never_same = false; 76 num = ctz64(~comp); 77 num = (num < bytes_to_check) ? num : bytes_to_check; 78 zrun_len += num; 79 bytes_to_check -= num; 80 comp >>= num; 81 i += num; 82 if (bytes_to_check) { 83 /* still has different data after same data */ 84 d += uleb128_encode_small(dst + d, zrun_len); 85 zrun_len = 0; 86 } else { 87 break; 88 } 89 } 90 if (never_same || zrun_len) { 91 /* 92 * never_same only acts if 93 * data begins with diff in first count512s 94 */ 95 d += uleb128_encode_small(dst + d, zrun_len); 96 zrun_len = 0; 97 never_same = false; 98 } 99 /* has diff, 64 data at a time for speed */ 100 if ((bytes_to_check == 64) && (comp == 0x0)) { 101 i += 64; 102 nzrun_len += 64; 103 break; 104 } 105 num = ctz64(comp); 106 num = (num < bytes_to_check) ? num : bytes_to_check; 107 nzrun_len += num; 108 bytes_to_check -= num; 109 comp >>= num; 110 i += num; 111 if (bytes_to_check) { 112 /* mask like 111000 */ 113 d += uleb128_encode_small(dst + d, nzrun_len); 114 /* overflow */ 115 if (d + nzrun_len > dlen) { 116 return -1; 117 } 118 nzrun_start = new_buf + i - nzrun_len; 119 memcpy(dst + d, nzrun_start, nzrun_len); 120 d += nzrun_len; 121 nzrun_len = 0; 122 is_same = true; 123 } 124 } 125 } 126 127 if (nzrun_len != 0) { 128 d += uleb128_encode_small(dst + d, nzrun_len); 129 /* overflow */ 130 if (d + nzrun_len > dlen) { 131 return -1; 132 } 133 nzrun_start = new_buf + i - nzrun_len; 134 memcpy(dst + d, nzrun_start, nzrun_len); 135 d += nzrun_len; 136 } 137 return d; 138 } 139 140 static int xbzrle_encode_buffer_int(uint8_t *old_buf, uint8_t *new_buf, 141 int slen, uint8_t *dst, int dlen); 142 143 static int (*accel_func)(uint8_t *, uint8_t *, int, uint8_t *, int); 144 145 static void __attribute__((constructor)) init_accel(void) 146 { 147 unsigned info = cpuinfo_init(); 148 if (info & CPUINFO_AVX512BW) { 149 accel_func = xbzrle_encode_buffer_avx512; 150 } else { 151 accel_func = xbzrle_encode_buffer_int; 152 } 153 } 154 155 int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen, 156 uint8_t *dst, int dlen) 157 { 158 return accel_func(old_buf, new_buf, slen, dst, dlen); 159 } 160 161 #define xbzrle_encode_buffer xbzrle_encode_buffer_int 162 #endif 163 164 /* 165 page = zrun nzrun 166 | zrun nzrun page 167 168 zrun = length 169 170 nzrun = length byte... 171 172 length = uleb128 encoded integer 173 */ 174 int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen, 175 uint8_t *dst, int dlen) 176 { 177 uint32_t zrun_len = 0, nzrun_len = 0; 178 int d = 0, i = 0; 179 long res; 180 uint8_t *nzrun_start = NULL; 181 182 g_assert(!(((uintptr_t)old_buf | (uintptr_t)new_buf | slen) % 183 sizeof(long))); 184 185 while (i < slen) { 186 /* overflow */ 187 if (d + 2 > dlen) { 188 return -1; 189 } 190 191 /* not aligned to sizeof(long) */ 192 res = (slen - i) % sizeof(long); 193 while (res && old_buf[i] == new_buf[i]) { 194 zrun_len++; 195 i++; 196 res--; 197 } 198 199 /* word at a time for speed */ 200 if (!res) { 201 while (i < slen && 202 (*(long *)(old_buf + i)) == (*(long *)(new_buf + i))) { 203 i += sizeof(long); 204 zrun_len += sizeof(long); 205 } 206 207 /* go over the rest */ 208 while (i < slen && old_buf[i] == new_buf[i]) { 209 zrun_len++; 210 i++; 211 } 212 } 213 214 /* buffer unchanged */ 215 if (zrun_len == slen) { 216 return 0; 217 } 218 219 /* skip last zero run */ 220 if (i == slen) { 221 return d; 222 } 223 224 d += uleb128_encode_small(dst + d, zrun_len); 225 226 zrun_len = 0; 227 nzrun_start = new_buf + i; 228 229 /* overflow */ 230 if (d + 2 > dlen) { 231 return -1; 232 } 233 /* not aligned to sizeof(long) */ 234 res = (slen - i) % sizeof(long); 235 while (res && old_buf[i] != new_buf[i]) { 236 i++; 237 nzrun_len++; 238 res--; 239 } 240 241 /* word at a time for speed, use of 32-bit long okay */ 242 if (!res) { 243 /* truncation to 32-bit long okay */ 244 unsigned long mask = (unsigned long)0x0101010101010101ULL; 245 while (i < slen) { 246 unsigned long xor; 247 xor = *(unsigned long *)(old_buf + i) 248 ^ *(unsigned long *)(new_buf + i); 249 if ((xor - mask) & ~xor & (mask << 7)) { 250 /* found the end of an nzrun within the current long */ 251 while (old_buf[i] != new_buf[i]) { 252 nzrun_len++; 253 i++; 254 } 255 break; 256 } else { 257 i += sizeof(long); 258 nzrun_len += sizeof(long); 259 } 260 } 261 } 262 263 d += uleb128_encode_small(dst + d, nzrun_len); 264 /* overflow */ 265 if (d + nzrun_len > dlen) { 266 return -1; 267 } 268 memcpy(dst + d, nzrun_start, nzrun_len); 269 d += nzrun_len; 270 nzrun_len = 0; 271 } 272 273 return d; 274 } 275 276 int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen) 277 { 278 int i = 0, d = 0; 279 int ret; 280 uint32_t count = 0; 281 282 while (i < slen) { 283 284 /* zrun */ 285 if ((slen - i) < 2) { 286 return -1; 287 } 288 289 ret = uleb128_decode_small(src + i, &count); 290 if (ret < 0 || (i && !count)) { 291 return -1; 292 } 293 i += ret; 294 d += count; 295 296 /* overflow */ 297 if (d > dlen) { 298 return -1; 299 } 300 301 /* nzrun */ 302 if ((slen - i) < 2) { 303 return -1; 304 } 305 306 ret = uleb128_decode_small(src + i, &count); 307 if (ret < 0 || !count) { 308 return -1; 309 } 310 i += ret; 311 312 /* overflow */ 313 if (d + count > dlen || i + count > slen) { 314 return -1; 315 } 316 317 memcpy(dst + d, src + i, count); 318 d += count; 319 i += count; 320 } 321 322 return d; 323 } 324