1 /* ****************************************************************** 2 * huff0 huffman decoder, 3 * part of Finite State Entropy library 4 * Copyright (c) Yann Collet, Facebook, Inc. 5 * 6 * You can contact the author at : 7 * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy 8 * 9 * This source code is licensed under both the BSD-style license (found in the 10 * LICENSE file in the root directory of this source tree) and the GPLv2 (found 11 * in the COPYING file in the root directory of this source tree). 12 * You may select, at your option, one of the above-listed licenses. 13 ****************************************************************** */ 14 15 /* ************************************************************** 16 * Dependencies 17 ****************************************************************/ 18 #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset */ 19 #include "../common/compiler.h" 20 #include "../common/bitstream.h" /* BIT_* */ 21 #include "../common/fse.h" /* to compress headers */ 22 #define HUF_STATIC_LINKING_ONLY 23 #include "../common/huf.h" 24 #include "../common/error_private.h" 25 26 /* ************************************************************** 27 * Macros 28 ****************************************************************/ 29 30 /* These two optional macros force the use one way or another of the two 31 * Huffman decompression implementations. You can't force in both directions 32 * at the same time. 33 */ 34 #if defined(HUF_FORCE_DECOMPRESS_X1) && \ 35 defined(HUF_FORCE_DECOMPRESS_X2) 36 #error "Cannot force the use of the X1 and X2 decoders at the same time!" 37 #endif 38 39 40 /* ************************************************************** 41 * Error Management 42 ****************************************************************/ 43 #define HUF_isError ERR_isError 44 45 46 /* ************************************************************** 47 * Byte alignment for workSpace management 48 ****************************************************************/ 49 #define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1) 50 #define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask)) 51 52 53 /* ************************************************************** 54 * BMI2 Variant Wrappers 55 ****************************************************************/ 56 #if DYNAMIC_BMI2 57 58 #define HUF_DGEN(fn) \ 59 \ 60 static size_t fn##_default( \ 61 void* dst, size_t dstSize, \ 62 const void* cSrc, size_t cSrcSize, \ 63 const HUF_DTable* DTable) \ 64 { \ 65 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ 66 } \ 67 \ 68 static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2( \ 69 void* dst, size_t dstSize, \ 70 const void* cSrc, size_t cSrcSize, \ 71 const HUF_DTable* DTable) \ 72 { \ 73 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ 74 } \ 75 \ 76 static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ 77 size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ 78 { \ 79 if (bmi2) { \ 80 return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \ 81 } \ 82 return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \ 83 } 84 85 #else 86 87 #define HUF_DGEN(fn) \ 88 static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ 89 size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ 90 { \ 91 (void)bmi2; \ 92 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ 93 } 94 95 #endif 96 97 98 /*-***************************/ 99 /* generic DTableDesc */ 100 /*-***************************/ 101 typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc; 102 103 static DTableDesc HUF_getDTableDesc(const HUF_DTable* table) 104 { 105 DTableDesc dtd; 106 ZSTD_memcpy(&dtd, table, sizeof(dtd)); 107 return dtd; 108 } 109 110 111 #ifndef HUF_FORCE_DECOMPRESS_X2 112 113 /*-***************************/ 114 /* single-symbol decoding */ 115 /*-***************************/ 116 typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1; /* single-symbol decoding */ 117 118 /* 119 * Packs 4 HUF_DEltX1 structs into a U64. This is used to lay down 4 entries at 120 * a time. 121 */ 122 static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) { 123 U64 D4; 124 if (MEM_isLittleEndian()) { 125 D4 = symbol + (nbBits << 8); 126 } else { 127 D4 = (symbol << 8) + nbBits; 128 } 129 D4 *= 0x0001000100010001ULL; 130 return D4; 131 } 132 133 typedef struct { 134 U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; 135 U32 rankStart[HUF_TABLELOG_ABSOLUTEMAX + 1]; 136 U32 statsWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32]; 137 BYTE symbols[HUF_SYMBOLVALUE_MAX + 1]; 138 BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; 139 } HUF_ReadDTableX1_Workspace; 140 141 142 size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize) 143 { 144 return HUF_readDTableX1_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0); 145 } 146 147 size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2) 148 { 149 U32 tableLog = 0; 150 U32 nbSymbols = 0; 151 size_t iSize; 152 void* const dtPtr = DTable + 1; 153 HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr; 154 HUF_ReadDTableX1_Workspace* wksp = (HUF_ReadDTableX1_Workspace*)workSpace; 155 156 DEBUG_STATIC_ASSERT(HUF_DECOMPRESS_WORKSPACE_SIZE >= sizeof(*wksp)); 157 if (sizeof(*wksp) > wkspSize) return ERROR(tableLog_tooLarge); 158 159 DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable)); 160 /* ZSTD_memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */ 161 162 iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), bmi2); 163 if (HUF_isError(iSize)) return iSize; 164 165 /* Table header */ 166 { DTableDesc dtd = HUF_getDTableDesc(DTable); 167 if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */ 168 dtd.tableType = 0; 169 dtd.tableLog = (BYTE)tableLog; 170 ZSTD_memcpy(DTable, &dtd, sizeof(dtd)); 171 } 172 173 /* Compute symbols and rankStart given rankVal: 174 * 175 * rankVal already contains the number of values of each weight. 176 * 177 * symbols contains the symbols ordered by weight. First are the rankVal[0] 178 * weight 0 symbols, followed by the rankVal[1] weight 1 symbols, and so on. 179 * symbols[0] is filled (but unused) to avoid a branch. 180 * 181 * rankStart contains the offset where each rank belongs in the DTable. 182 * rankStart[0] is not filled because there are no entries in the table for 183 * weight 0. 184 */ 185 { 186 int n; 187 int nextRankStart = 0; 188 int const unroll = 4; 189 int const nLimit = (int)nbSymbols - unroll + 1; 190 for (n=0; n<(int)tableLog+1; n++) { 191 U32 const curr = nextRankStart; 192 nextRankStart += wksp->rankVal[n]; 193 wksp->rankStart[n] = curr; 194 } 195 for (n=0; n < nLimit; n += unroll) { 196 int u; 197 for (u=0; u < unroll; ++u) { 198 size_t const w = wksp->huffWeight[n+u]; 199 wksp->symbols[wksp->rankStart[w]++] = (BYTE)(n+u); 200 } 201 } 202 for (; n < (int)nbSymbols; ++n) { 203 size_t const w = wksp->huffWeight[n]; 204 wksp->symbols[wksp->rankStart[w]++] = (BYTE)n; 205 } 206 } 207 208 /* fill DTable 209 * We fill all entries of each weight in order. 210 * That way length is a constant for each iteration of the outter loop. 211 * We can switch based on the length to a different inner loop which is 212 * optimized for that particular case. 213 */ 214 { 215 U32 w; 216 int symbol=wksp->rankVal[0]; 217 int rankStart=0; 218 for (w=1; w<tableLog+1; ++w) { 219 int const symbolCount = wksp->rankVal[w]; 220 int const length = (1 << w) >> 1; 221 int uStart = rankStart; 222 BYTE const nbBits = (BYTE)(tableLog + 1 - w); 223 int s; 224 int u; 225 switch (length) { 226 case 1: 227 for (s=0; s<symbolCount; ++s) { 228 HUF_DEltX1 D; 229 D.byte = wksp->symbols[symbol + s]; 230 D.nbBits = nbBits; 231 dt[uStart] = D; 232 uStart += 1; 233 } 234 break; 235 case 2: 236 for (s=0; s<symbolCount; ++s) { 237 HUF_DEltX1 D; 238 D.byte = wksp->symbols[symbol + s]; 239 D.nbBits = nbBits; 240 dt[uStart+0] = D; 241 dt[uStart+1] = D; 242 uStart += 2; 243 } 244 break; 245 case 4: 246 for (s=0; s<symbolCount; ++s) { 247 U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits); 248 MEM_write64(dt + uStart, D4); 249 uStart += 4; 250 } 251 break; 252 case 8: 253 for (s=0; s<symbolCount; ++s) { 254 U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits); 255 MEM_write64(dt + uStart, D4); 256 MEM_write64(dt + uStart + 4, D4); 257 uStart += 8; 258 } 259 break; 260 default: 261 for (s=0; s<symbolCount; ++s) { 262 U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits); 263 for (u=0; u < length; u += 16) { 264 MEM_write64(dt + uStart + u + 0, D4); 265 MEM_write64(dt + uStart + u + 4, D4); 266 MEM_write64(dt + uStart + u + 8, D4); 267 MEM_write64(dt + uStart + u + 12, D4); 268 } 269 assert(u == length); 270 uStart += length; 271 } 272 break; 273 } 274 symbol += symbolCount; 275 rankStart += symbolCount * length; 276 } 277 } 278 return iSize; 279 } 280 281 FORCE_INLINE_TEMPLATE BYTE 282 HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog) 283 { 284 size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */ 285 BYTE const c = dt[val].byte; 286 BIT_skipBits(Dstream, dt[val].nbBits); 287 return c; 288 } 289 290 #define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \ 291 *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog) 292 293 #define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \ 294 if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ 295 HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) 296 297 #define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \ 298 if (MEM_64bits()) \ 299 HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) 300 301 HINT_INLINE size_t 302 HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog) 303 { 304 BYTE* const pStart = p; 305 306 /* up to 4 symbols at a time */ 307 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) { 308 HUF_DECODE_SYMBOLX1_2(p, bitDPtr); 309 HUF_DECODE_SYMBOLX1_1(p, bitDPtr); 310 HUF_DECODE_SYMBOLX1_2(p, bitDPtr); 311 HUF_DECODE_SYMBOLX1_0(p, bitDPtr); 312 } 313 314 /* [0-3] symbols remaining */ 315 if (MEM_32bits()) 316 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd)) 317 HUF_DECODE_SYMBOLX1_0(p, bitDPtr); 318 319 /* no more data to retrieve from bitstream, no need to reload */ 320 while (p < pEnd) 321 HUF_DECODE_SYMBOLX1_0(p, bitDPtr); 322 323 return pEnd-pStart; 324 } 325 326 FORCE_INLINE_TEMPLATE size_t 327 HUF_decompress1X1_usingDTable_internal_body( 328 void* dst, size_t dstSize, 329 const void* cSrc, size_t cSrcSize, 330 const HUF_DTable* DTable) 331 { 332 BYTE* op = (BYTE*)dst; 333 BYTE* const oend = op + dstSize; 334 const void* dtPtr = DTable + 1; 335 const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr; 336 BIT_DStream_t bitD; 337 DTableDesc const dtd = HUF_getDTableDesc(DTable); 338 U32 const dtLog = dtd.tableLog; 339 340 CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) ); 341 342 HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog); 343 344 if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected); 345 346 return dstSize; 347 } 348 349 FORCE_INLINE_TEMPLATE size_t 350 HUF_decompress4X1_usingDTable_internal_body( 351 void* dst, size_t dstSize, 352 const void* cSrc, size_t cSrcSize, 353 const HUF_DTable* DTable) 354 { 355 /* Check */ 356 if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ 357 358 { const BYTE* const istart = (const BYTE*) cSrc; 359 BYTE* const ostart = (BYTE*) dst; 360 BYTE* const oend = ostart + dstSize; 361 BYTE* const olimit = oend - 3; 362 const void* const dtPtr = DTable + 1; 363 const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr; 364 365 /* Init */ 366 BIT_DStream_t bitD1; 367 BIT_DStream_t bitD2; 368 BIT_DStream_t bitD3; 369 BIT_DStream_t bitD4; 370 size_t const length1 = MEM_readLE16(istart); 371 size_t const length2 = MEM_readLE16(istart+2); 372 size_t const length3 = MEM_readLE16(istart+4); 373 size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6); 374 const BYTE* const istart1 = istart + 6; /* jumpTable */ 375 const BYTE* const istart2 = istart1 + length1; 376 const BYTE* const istart3 = istart2 + length2; 377 const BYTE* const istart4 = istart3 + length3; 378 const size_t segmentSize = (dstSize+3) / 4; 379 BYTE* const opStart2 = ostart + segmentSize; 380 BYTE* const opStart3 = opStart2 + segmentSize; 381 BYTE* const opStart4 = opStart3 + segmentSize; 382 BYTE* op1 = ostart; 383 BYTE* op2 = opStart2; 384 BYTE* op3 = opStart3; 385 BYTE* op4 = opStart4; 386 DTableDesc const dtd = HUF_getDTableDesc(DTable); 387 U32 const dtLog = dtd.tableLog; 388 U32 endSignal = 1; 389 390 if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ 391 CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); 392 CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); 393 CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); 394 CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); 395 396 /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */ 397 for ( ; (endSignal) & (op4 < olimit) ; ) { 398 HUF_DECODE_SYMBOLX1_2(op1, &bitD1); 399 HUF_DECODE_SYMBOLX1_2(op2, &bitD2); 400 HUF_DECODE_SYMBOLX1_2(op3, &bitD3); 401 HUF_DECODE_SYMBOLX1_2(op4, &bitD4); 402 HUF_DECODE_SYMBOLX1_1(op1, &bitD1); 403 HUF_DECODE_SYMBOLX1_1(op2, &bitD2); 404 HUF_DECODE_SYMBOLX1_1(op3, &bitD3); 405 HUF_DECODE_SYMBOLX1_1(op4, &bitD4); 406 HUF_DECODE_SYMBOLX1_2(op1, &bitD1); 407 HUF_DECODE_SYMBOLX1_2(op2, &bitD2); 408 HUF_DECODE_SYMBOLX1_2(op3, &bitD3); 409 HUF_DECODE_SYMBOLX1_2(op4, &bitD4); 410 HUF_DECODE_SYMBOLX1_0(op1, &bitD1); 411 HUF_DECODE_SYMBOLX1_0(op2, &bitD2); 412 HUF_DECODE_SYMBOLX1_0(op3, &bitD3); 413 HUF_DECODE_SYMBOLX1_0(op4, &bitD4); 414 endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished; 415 endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished; 416 endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished; 417 endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished; 418 } 419 420 /* check corruption */ 421 /* note : should not be necessary : op# advance in lock step, and we control op4. 422 * but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */ 423 if (op1 > opStart2) return ERROR(corruption_detected); 424 if (op2 > opStart3) return ERROR(corruption_detected); 425 if (op3 > opStart4) return ERROR(corruption_detected); 426 /* note : op4 supposed already verified within main loop */ 427 428 /* finish bitStreams one by one */ 429 HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog); 430 HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog); 431 HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog); 432 HUF_decodeStreamX1(op4, &bitD4, oend, dt, dtLog); 433 434 /* check */ 435 { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); 436 if (!endCheck) return ERROR(corruption_detected); } 437 438 /* decoded size */ 439 return dstSize; 440 } 441 } 442 443 444 typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize, 445 const void *cSrc, 446 size_t cSrcSize, 447 const HUF_DTable *DTable); 448 449 HUF_DGEN(HUF_decompress1X1_usingDTable_internal) 450 HUF_DGEN(HUF_decompress4X1_usingDTable_internal) 451 452 453 454 size_t HUF_decompress1X1_usingDTable( 455 void* dst, size_t dstSize, 456 const void* cSrc, size_t cSrcSize, 457 const HUF_DTable* DTable) 458 { 459 DTableDesc dtd = HUF_getDTableDesc(DTable); 460 if (dtd.tableType != 0) return ERROR(GENERIC); 461 return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 462 } 463 464 size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, 465 const void* cSrc, size_t cSrcSize, 466 void* workSpace, size_t wkspSize) 467 { 468 const BYTE* ip = (const BYTE*) cSrc; 469 470 size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize); 471 if (HUF_isError(hSize)) return hSize; 472 if (hSize >= cSrcSize) return ERROR(srcSize_wrong); 473 ip += hSize; cSrcSize -= hSize; 474 475 return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0); 476 } 477 478 479 size_t HUF_decompress4X1_usingDTable( 480 void* dst, size_t dstSize, 481 const void* cSrc, size_t cSrcSize, 482 const HUF_DTable* DTable) 483 { 484 DTableDesc dtd = HUF_getDTableDesc(DTable); 485 if (dtd.tableType != 0) return ERROR(GENERIC); 486 return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 487 } 488 489 static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, 490 const void* cSrc, size_t cSrcSize, 491 void* workSpace, size_t wkspSize, int bmi2) 492 { 493 const BYTE* ip = (const BYTE*) cSrc; 494 495 size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2); 496 if (HUF_isError(hSize)) return hSize; 497 if (hSize >= cSrcSize) return ERROR(srcSize_wrong); 498 ip += hSize; cSrcSize -= hSize; 499 500 return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); 501 } 502 503 size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, 504 const void* cSrc, size_t cSrcSize, 505 void* workSpace, size_t wkspSize) 506 { 507 return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0); 508 } 509 510 511 #endif /* HUF_FORCE_DECOMPRESS_X2 */ 512 513 514 #ifndef HUF_FORCE_DECOMPRESS_X1 515 516 /* *************************/ 517 /* double-symbols decoding */ 518 /* *************************/ 519 520 typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2; /* double-symbols decoding */ 521 typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t; 522 typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1]; 523 typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX]; 524 525 526 /* HUF_fillDTableX2Level2() : 527 * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */ 528 static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed, 529 const U32* rankValOrigin, const int minWeight, 530 const sortedSymbol_t* sortedSymbols, const U32 sortedListSize, 531 U32 nbBitsBaseline, U16 baseSeq, U32* wksp, size_t wkspSize) 532 { 533 HUF_DEltX2 DElt; 534 U32* rankVal = wksp; 535 536 assert(wkspSize >= HUF_TABLELOG_MAX + 1); 537 (void)wkspSize; 538 /* get pre-calculated rankVal */ 539 ZSTD_memcpy(rankVal, rankValOrigin, sizeof(U32) * (HUF_TABLELOG_MAX + 1)); 540 541 /* fill skipped values */ 542 if (minWeight>1) { 543 U32 i, skipSize = rankVal[minWeight]; 544 MEM_writeLE16(&(DElt.sequence), baseSeq); 545 DElt.nbBits = (BYTE)(consumed); 546 DElt.length = 1; 547 for (i = 0; i < skipSize; i++) 548 DTable[i] = DElt; 549 } 550 551 /* fill DTable */ 552 { U32 s; for (s=0; s<sortedListSize; s++) { /* note : sortedSymbols already skipped */ 553 const U32 symbol = sortedSymbols[s].symbol; 554 const U32 weight = sortedSymbols[s].weight; 555 const U32 nbBits = nbBitsBaseline - weight; 556 const U32 length = 1 << (sizeLog-nbBits); 557 const U32 start = rankVal[weight]; 558 U32 i = start; 559 const U32 end = start + length; 560 561 MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8))); 562 DElt.nbBits = (BYTE)(nbBits + consumed); 563 DElt.length = 2; 564 do { DTable[i++] = DElt; } while (i<end); /* since length >= 1 */ 565 566 rankVal[weight] += length; 567 } } 568 } 569 570 571 static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog, 572 const sortedSymbol_t* sortedList, const U32 sortedListSize, 573 const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight, 574 const U32 nbBitsBaseline, U32* wksp, size_t wkspSize) 575 { 576 U32* rankVal = wksp; 577 const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */ 578 const U32 minBits = nbBitsBaseline - maxWeight; 579 U32 s; 580 581 assert(wkspSize >= HUF_TABLELOG_MAX + 1); 582 wksp += HUF_TABLELOG_MAX + 1; 583 wkspSize -= HUF_TABLELOG_MAX + 1; 584 585 ZSTD_memcpy(rankVal, rankValOrigin, sizeof(U32) * (HUF_TABLELOG_MAX + 1)); 586 587 /* fill DTable */ 588 for (s=0; s<sortedListSize; s++) { 589 const U16 symbol = sortedList[s].symbol; 590 const U32 weight = sortedList[s].weight; 591 const U32 nbBits = nbBitsBaseline - weight; 592 const U32 start = rankVal[weight]; 593 const U32 length = 1 << (targetLog-nbBits); 594 595 if (targetLog-nbBits >= minBits) { /* enough room for a second symbol */ 596 U32 sortedRank; 597 int minWeight = nbBits + scaleLog; 598 if (minWeight < 1) minWeight = 1; 599 sortedRank = rankStart[minWeight]; 600 HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits, 601 rankValOrigin[nbBits], minWeight, 602 sortedList+sortedRank, sortedListSize-sortedRank, 603 nbBitsBaseline, symbol, wksp, wkspSize); 604 } else { 605 HUF_DEltX2 DElt; 606 MEM_writeLE16(&(DElt.sequence), symbol); 607 DElt.nbBits = (BYTE)(nbBits); 608 DElt.length = 1; 609 { U32 const end = start + length; 610 U32 u; 611 for (u = start; u < end; u++) DTable[u] = DElt; 612 } } 613 rankVal[weight] += length; 614 } 615 } 616 617 typedef struct { 618 rankValCol_t rankVal[HUF_TABLELOG_MAX]; 619 U32 rankStats[HUF_TABLELOG_MAX + 1]; 620 U32 rankStart0[HUF_TABLELOG_MAX + 2]; 621 sortedSymbol_t sortedSymbol[HUF_SYMBOLVALUE_MAX + 1]; 622 BYTE weightList[HUF_SYMBOLVALUE_MAX + 1]; 623 U32 calleeWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32]; 624 } HUF_ReadDTableX2_Workspace; 625 626 size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, 627 const void* src, size_t srcSize, 628 void* workSpace, size_t wkspSize) 629 { 630 U32 tableLog, maxW, sizeOfSort, nbSymbols; 631 DTableDesc dtd = HUF_getDTableDesc(DTable); 632 U32 const maxTableLog = dtd.maxTableLog; 633 size_t iSize; 634 void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */ 635 HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr; 636 U32 *rankStart; 637 638 HUF_ReadDTableX2_Workspace* const wksp = (HUF_ReadDTableX2_Workspace*)workSpace; 639 640 if (sizeof(*wksp) > wkspSize) return ERROR(GENERIC); 641 642 rankStart = wksp->rankStart0 + 1; 643 ZSTD_memset(wksp->rankStats, 0, sizeof(wksp->rankStats)); 644 ZSTD_memset(wksp->rankStart0, 0, sizeof(wksp->rankStart0)); 645 646 DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */ 647 if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); 648 /* ZSTD_memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */ 649 650 iSize = HUF_readStats_wksp(wksp->weightList, HUF_SYMBOLVALUE_MAX + 1, wksp->rankStats, &nbSymbols, &tableLog, src, srcSize, wksp->calleeWksp, sizeof(wksp->calleeWksp), /* bmi2 */ 0); 651 if (HUF_isError(iSize)) return iSize; 652 653 /* check result */ 654 if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */ 655 656 /* find maxWeight */ 657 for (maxW = tableLog; wksp->rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */ 658 659 /* Get start index of each weight */ 660 { U32 w, nextRankStart = 0; 661 for (w=1; w<maxW+1; w++) { 662 U32 curr = nextRankStart; 663 nextRankStart += wksp->rankStats[w]; 664 rankStart[w] = curr; 665 } 666 rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/ 667 sizeOfSort = nextRankStart; 668 } 669 670 /* sort symbols by weight */ 671 { U32 s; 672 for (s=0; s<nbSymbols; s++) { 673 U32 const w = wksp->weightList[s]; 674 U32 const r = rankStart[w]++; 675 wksp->sortedSymbol[r].symbol = (BYTE)s; 676 wksp->sortedSymbol[r].weight = (BYTE)w; 677 } 678 rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */ 679 } 680 681 /* Build rankVal */ 682 { U32* const rankVal0 = wksp->rankVal[0]; 683 { int const rescale = (maxTableLog-tableLog) - 1; /* tableLog <= maxTableLog */ 684 U32 nextRankVal = 0; 685 U32 w; 686 for (w=1; w<maxW+1; w++) { 687 U32 curr = nextRankVal; 688 nextRankVal += wksp->rankStats[w] << (w+rescale); 689 rankVal0[w] = curr; 690 } } 691 { U32 const minBits = tableLog+1 - maxW; 692 U32 consumed; 693 for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) { 694 U32* const rankValPtr = wksp->rankVal[consumed]; 695 U32 w; 696 for (w = 1; w < maxW+1; w++) { 697 rankValPtr[w] = rankVal0[w] >> consumed; 698 } } } } 699 700 HUF_fillDTableX2(dt, maxTableLog, 701 wksp->sortedSymbol, sizeOfSort, 702 wksp->rankStart0, wksp->rankVal, maxW, 703 tableLog+1, 704 wksp->calleeWksp, sizeof(wksp->calleeWksp) / sizeof(U32)); 705 706 dtd.tableLog = (BYTE)maxTableLog; 707 dtd.tableType = 1; 708 ZSTD_memcpy(DTable, &dtd, sizeof(dtd)); 709 return iSize; 710 } 711 712 713 FORCE_INLINE_TEMPLATE U32 714 HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog) 715 { 716 size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ 717 ZSTD_memcpy(op, dt+val, 2); 718 BIT_skipBits(DStream, dt[val].nbBits); 719 return dt[val].length; 720 } 721 722 FORCE_INLINE_TEMPLATE U32 723 HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog) 724 { 725 size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ 726 ZSTD_memcpy(op, dt+val, 1); 727 if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits); 728 else { 729 if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) { 730 BIT_skipBits(DStream, dt[val].nbBits); 731 if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8)) 732 /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */ 733 DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8); 734 } } 735 return 1; 736 } 737 738 #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \ 739 ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) 740 741 #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \ 742 if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ 743 ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) 744 745 #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \ 746 if (MEM_64bits()) \ 747 ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) 748 749 HINT_INLINE size_t 750 HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, 751 const HUF_DEltX2* const dt, const U32 dtLog) 752 { 753 BYTE* const pStart = p; 754 755 /* up to 8 symbols at a time */ 756 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) { 757 HUF_DECODE_SYMBOLX2_2(p, bitDPtr); 758 HUF_DECODE_SYMBOLX2_1(p, bitDPtr); 759 HUF_DECODE_SYMBOLX2_2(p, bitDPtr); 760 HUF_DECODE_SYMBOLX2_0(p, bitDPtr); 761 } 762 763 /* closer to end : up to 2 symbols at a time */ 764 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2)) 765 HUF_DECODE_SYMBOLX2_0(p, bitDPtr); 766 767 while (p <= pEnd-2) 768 HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */ 769 770 if (p < pEnd) 771 p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog); 772 773 return p-pStart; 774 } 775 776 FORCE_INLINE_TEMPLATE size_t 777 HUF_decompress1X2_usingDTable_internal_body( 778 void* dst, size_t dstSize, 779 const void* cSrc, size_t cSrcSize, 780 const HUF_DTable* DTable) 781 { 782 BIT_DStream_t bitD; 783 784 /* Init */ 785 CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) ); 786 787 /* decode */ 788 { BYTE* const ostart = (BYTE*) dst; 789 BYTE* const oend = ostart + dstSize; 790 const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */ 791 const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; 792 DTableDesc const dtd = HUF_getDTableDesc(DTable); 793 HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog); 794 } 795 796 /* check */ 797 if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected); 798 799 /* decoded size */ 800 return dstSize; 801 } 802 803 FORCE_INLINE_TEMPLATE size_t 804 HUF_decompress4X2_usingDTable_internal_body( 805 void* dst, size_t dstSize, 806 const void* cSrc, size_t cSrcSize, 807 const HUF_DTable* DTable) 808 { 809 if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ 810 811 { const BYTE* const istart = (const BYTE*) cSrc; 812 BYTE* const ostart = (BYTE*) dst; 813 BYTE* const oend = ostart + dstSize; 814 BYTE* const olimit = oend - (sizeof(size_t)-1); 815 const void* const dtPtr = DTable+1; 816 const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; 817 818 /* Init */ 819 BIT_DStream_t bitD1; 820 BIT_DStream_t bitD2; 821 BIT_DStream_t bitD3; 822 BIT_DStream_t bitD4; 823 size_t const length1 = MEM_readLE16(istart); 824 size_t const length2 = MEM_readLE16(istart+2); 825 size_t const length3 = MEM_readLE16(istart+4); 826 size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6); 827 const BYTE* const istart1 = istart + 6; /* jumpTable */ 828 const BYTE* const istart2 = istart1 + length1; 829 const BYTE* const istart3 = istart2 + length2; 830 const BYTE* const istart4 = istart3 + length3; 831 size_t const segmentSize = (dstSize+3) / 4; 832 BYTE* const opStart2 = ostart + segmentSize; 833 BYTE* const opStart3 = opStart2 + segmentSize; 834 BYTE* const opStart4 = opStart3 + segmentSize; 835 BYTE* op1 = ostart; 836 BYTE* op2 = opStart2; 837 BYTE* op3 = opStart3; 838 BYTE* op4 = opStart4; 839 U32 endSignal = 1; 840 DTableDesc const dtd = HUF_getDTableDesc(DTable); 841 U32 const dtLog = dtd.tableLog; 842 843 if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ 844 CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); 845 CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); 846 CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); 847 CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); 848 849 /* 16-32 symbols per loop (4-8 symbols per stream) */ 850 for ( ; (endSignal) & (op4 < olimit); ) { 851 #if defined(__clang__) && (defined(__x86_64__) || defined(__i386__)) 852 HUF_DECODE_SYMBOLX2_2(op1, &bitD1); 853 HUF_DECODE_SYMBOLX2_1(op1, &bitD1); 854 HUF_DECODE_SYMBOLX2_2(op1, &bitD1); 855 HUF_DECODE_SYMBOLX2_0(op1, &bitD1); 856 HUF_DECODE_SYMBOLX2_2(op2, &bitD2); 857 HUF_DECODE_SYMBOLX2_1(op2, &bitD2); 858 HUF_DECODE_SYMBOLX2_2(op2, &bitD2); 859 HUF_DECODE_SYMBOLX2_0(op2, &bitD2); 860 endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished; 861 endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished; 862 HUF_DECODE_SYMBOLX2_2(op3, &bitD3); 863 HUF_DECODE_SYMBOLX2_1(op3, &bitD3); 864 HUF_DECODE_SYMBOLX2_2(op3, &bitD3); 865 HUF_DECODE_SYMBOLX2_0(op3, &bitD3); 866 HUF_DECODE_SYMBOLX2_2(op4, &bitD4); 867 HUF_DECODE_SYMBOLX2_1(op4, &bitD4); 868 HUF_DECODE_SYMBOLX2_2(op4, &bitD4); 869 HUF_DECODE_SYMBOLX2_0(op4, &bitD4); 870 endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished; 871 endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished; 872 #else 873 HUF_DECODE_SYMBOLX2_2(op1, &bitD1); 874 HUF_DECODE_SYMBOLX2_2(op2, &bitD2); 875 HUF_DECODE_SYMBOLX2_2(op3, &bitD3); 876 HUF_DECODE_SYMBOLX2_2(op4, &bitD4); 877 HUF_DECODE_SYMBOLX2_1(op1, &bitD1); 878 HUF_DECODE_SYMBOLX2_1(op2, &bitD2); 879 HUF_DECODE_SYMBOLX2_1(op3, &bitD3); 880 HUF_DECODE_SYMBOLX2_1(op4, &bitD4); 881 HUF_DECODE_SYMBOLX2_2(op1, &bitD1); 882 HUF_DECODE_SYMBOLX2_2(op2, &bitD2); 883 HUF_DECODE_SYMBOLX2_2(op3, &bitD3); 884 HUF_DECODE_SYMBOLX2_2(op4, &bitD4); 885 HUF_DECODE_SYMBOLX2_0(op1, &bitD1); 886 HUF_DECODE_SYMBOLX2_0(op2, &bitD2); 887 HUF_DECODE_SYMBOLX2_0(op3, &bitD3); 888 HUF_DECODE_SYMBOLX2_0(op4, &bitD4); 889 endSignal = (U32)LIKELY((U32) 890 (BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished) 891 & (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished) 892 & (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished) 893 & (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished)); 894 #endif 895 } 896 897 /* check corruption */ 898 if (op1 > opStart2) return ERROR(corruption_detected); 899 if (op2 > opStart3) return ERROR(corruption_detected); 900 if (op3 > opStart4) return ERROR(corruption_detected); 901 /* note : op4 already verified within main loop */ 902 903 /* finish bitStreams one by one */ 904 HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog); 905 HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog); 906 HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog); 907 HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog); 908 909 /* check */ 910 { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); 911 if (!endCheck) return ERROR(corruption_detected); } 912 913 /* decoded size */ 914 return dstSize; 915 } 916 } 917 918 HUF_DGEN(HUF_decompress1X2_usingDTable_internal) 919 HUF_DGEN(HUF_decompress4X2_usingDTable_internal) 920 921 size_t HUF_decompress1X2_usingDTable( 922 void* dst, size_t dstSize, 923 const void* cSrc, size_t cSrcSize, 924 const HUF_DTable* DTable) 925 { 926 DTableDesc dtd = HUF_getDTableDesc(DTable); 927 if (dtd.tableType != 1) return ERROR(GENERIC); 928 return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 929 } 930 931 size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, 932 const void* cSrc, size_t cSrcSize, 933 void* workSpace, size_t wkspSize) 934 { 935 const BYTE* ip = (const BYTE*) cSrc; 936 937 size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, 938 workSpace, wkspSize); 939 if (HUF_isError(hSize)) return hSize; 940 if (hSize >= cSrcSize) return ERROR(srcSize_wrong); 941 ip += hSize; cSrcSize -= hSize; 942 943 return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0); 944 } 945 946 947 size_t HUF_decompress4X2_usingDTable( 948 void* dst, size_t dstSize, 949 const void* cSrc, size_t cSrcSize, 950 const HUF_DTable* DTable) 951 { 952 DTableDesc dtd = HUF_getDTableDesc(DTable); 953 if (dtd.tableType != 1) return ERROR(GENERIC); 954 return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 955 } 956 957 static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, 958 const void* cSrc, size_t cSrcSize, 959 void* workSpace, size_t wkspSize, int bmi2) 960 { 961 const BYTE* ip = (const BYTE*) cSrc; 962 963 size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize, 964 workSpace, wkspSize); 965 if (HUF_isError(hSize)) return hSize; 966 if (hSize >= cSrcSize) return ERROR(srcSize_wrong); 967 ip += hSize; cSrcSize -= hSize; 968 969 return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); 970 } 971 972 size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, 973 const void* cSrc, size_t cSrcSize, 974 void* workSpace, size_t wkspSize) 975 { 976 return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0); 977 } 978 979 980 #endif /* HUF_FORCE_DECOMPRESS_X1 */ 981 982 983 /* ***********************************/ 984 /* Universal decompression selectors */ 985 /* ***********************************/ 986 987 size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, 988 const void* cSrc, size_t cSrcSize, 989 const HUF_DTable* DTable) 990 { 991 DTableDesc const dtd = HUF_getDTableDesc(DTable); 992 #if defined(HUF_FORCE_DECOMPRESS_X1) 993 (void)dtd; 994 assert(dtd.tableType == 0); 995 return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 996 #elif defined(HUF_FORCE_DECOMPRESS_X2) 997 (void)dtd; 998 assert(dtd.tableType == 1); 999 return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 1000 #else 1001 return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : 1002 HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 1003 #endif 1004 } 1005 1006 size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, 1007 const void* cSrc, size_t cSrcSize, 1008 const HUF_DTable* DTable) 1009 { 1010 DTableDesc const dtd = HUF_getDTableDesc(DTable); 1011 #if defined(HUF_FORCE_DECOMPRESS_X1) 1012 (void)dtd; 1013 assert(dtd.tableType == 0); 1014 return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 1015 #elif defined(HUF_FORCE_DECOMPRESS_X2) 1016 (void)dtd; 1017 assert(dtd.tableType == 1); 1018 return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 1019 #else 1020 return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : 1021 HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 1022 #endif 1023 } 1024 1025 1026 #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2) 1027 typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t; 1028 static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] = 1029 { 1030 /* single, double, quad */ 1031 {{0,0}, {1,1}, {2,2}}, /* Q==0 : impossible */ 1032 {{0,0}, {1,1}, {2,2}}, /* Q==1 : impossible */ 1033 {{ 38,130}, {1313, 74}, {2151, 38}}, /* Q == 2 : 12-18% */ 1034 {{ 448,128}, {1353, 74}, {2238, 41}}, /* Q == 3 : 18-25% */ 1035 {{ 556,128}, {1353, 74}, {2238, 47}}, /* Q == 4 : 25-32% */ 1036 {{ 714,128}, {1418, 74}, {2436, 53}}, /* Q == 5 : 32-38% */ 1037 {{ 883,128}, {1437, 74}, {2464, 61}}, /* Q == 6 : 38-44% */ 1038 {{ 897,128}, {1515, 75}, {2622, 68}}, /* Q == 7 : 44-50% */ 1039 {{ 926,128}, {1613, 75}, {2730, 75}}, /* Q == 8 : 50-56% */ 1040 {{ 947,128}, {1729, 77}, {3359, 77}}, /* Q == 9 : 56-62% */ 1041 {{1107,128}, {2083, 81}, {4006, 84}}, /* Q ==10 : 62-69% */ 1042 {{1177,128}, {2379, 87}, {4785, 88}}, /* Q ==11 : 69-75% */ 1043 {{1242,128}, {2415, 93}, {5155, 84}}, /* Q ==12 : 75-81% */ 1044 {{1349,128}, {2644,106}, {5260,106}}, /* Q ==13 : 81-87% */ 1045 {{1455,128}, {2422,124}, {4174,124}}, /* Q ==14 : 87-93% */ 1046 {{ 722,128}, {1891,145}, {1936,146}}, /* Q ==15 : 93-99% */ 1047 }; 1048 #endif 1049 1050 /* HUF_selectDecoder() : 1051 * Tells which decoder is likely to decode faster, 1052 * based on a set of pre-computed metrics. 1053 * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 . 1054 * Assumption : 0 < dstSize <= 128 KB */ 1055 U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize) 1056 { 1057 assert(dstSize > 0); 1058 assert(dstSize <= 128*1024); 1059 #if defined(HUF_FORCE_DECOMPRESS_X1) 1060 (void)dstSize; 1061 (void)cSrcSize; 1062 return 0; 1063 #elif defined(HUF_FORCE_DECOMPRESS_X2) 1064 (void)dstSize; 1065 (void)cSrcSize; 1066 return 1; 1067 #else 1068 /* decoder timing evaluation */ 1069 { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */ 1070 U32 const D256 = (U32)(dstSize >> 8); 1071 U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256); 1072 U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256); 1073 DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, to reduce cache eviction */ 1074 return DTime1 < DTime0; 1075 } 1076 #endif 1077 } 1078 1079 1080 size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, 1081 size_t dstSize, const void* cSrc, 1082 size_t cSrcSize, void* workSpace, 1083 size_t wkspSize) 1084 { 1085 /* validation checks */ 1086 if (dstSize == 0) return ERROR(dstSize_tooSmall); 1087 if (cSrcSize == 0) return ERROR(corruption_detected); 1088 1089 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); 1090 #if defined(HUF_FORCE_DECOMPRESS_X1) 1091 (void)algoNb; 1092 assert(algoNb == 0); 1093 return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); 1094 #elif defined(HUF_FORCE_DECOMPRESS_X2) 1095 (void)algoNb; 1096 assert(algoNb == 1); 1097 return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); 1098 #else 1099 return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, 1100 cSrcSize, workSpace, wkspSize): 1101 HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); 1102 #endif 1103 } 1104 } 1105 1106 size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, 1107 const void* cSrc, size_t cSrcSize, 1108 void* workSpace, size_t wkspSize) 1109 { 1110 /* validation checks */ 1111 if (dstSize == 0) return ERROR(dstSize_tooSmall); 1112 if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ 1113 if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ 1114 if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ 1115 1116 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); 1117 #if defined(HUF_FORCE_DECOMPRESS_X1) 1118 (void)algoNb; 1119 assert(algoNb == 0); 1120 return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc, 1121 cSrcSize, workSpace, wkspSize); 1122 #elif defined(HUF_FORCE_DECOMPRESS_X2) 1123 (void)algoNb; 1124 assert(algoNb == 1); 1125 return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, 1126 cSrcSize, workSpace, wkspSize); 1127 #else 1128 return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, 1129 cSrcSize, workSpace, wkspSize): 1130 HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc, 1131 cSrcSize, workSpace, wkspSize); 1132 #endif 1133 } 1134 } 1135 1136 1137 size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) 1138 { 1139 DTableDesc const dtd = HUF_getDTableDesc(DTable); 1140 #if defined(HUF_FORCE_DECOMPRESS_X1) 1141 (void)dtd; 1142 assert(dtd.tableType == 0); 1143 return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); 1144 #elif defined(HUF_FORCE_DECOMPRESS_X2) 1145 (void)dtd; 1146 assert(dtd.tableType == 1); 1147 return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); 1148 #else 1149 return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : 1150 HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); 1151 #endif 1152 } 1153 1154 #ifndef HUF_FORCE_DECOMPRESS_X2 1155 size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) 1156 { 1157 const BYTE* ip = (const BYTE*) cSrc; 1158 1159 size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2); 1160 if (HUF_isError(hSize)) return hSize; 1161 if (hSize >= cSrcSize) return ERROR(srcSize_wrong); 1162 ip += hSize; cSrcSize -= hSize; 1163 1164 return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); 1165 } 1166 #endif 1167 1168 size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) 1169 { 1170 DTableDesc const dtd = HUF_getDTableDesc(DTable); 1171 #if defined(HUF_FORCE_DECOMPRESS_X1) 1172 (void)dtd; 1173 assert(dtd.tableType == 0); 1174 return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); 1175 #elif defined(HUF_FORCE_DECOMPRESS_X2) 1176 (void)dtd; 1177 assert(dtd.tableType == 1); 1178 return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); 1179 #else 1180 return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : 1181 HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); 1182 #endif 1183 } 1184 1185 size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) 1186 { 1187 /* validation checks */ 1188 if (dstSize == 0) return ERROR(dstSize_tooSmall); 1189 if (cSrcSize == 0) return ERROR(corruption_detected); 1190 1191 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); 1192 #if defined(HUF_FORCE_DECOMPRESS_X1) 1193 (void)algoNb; 1194 assert(algoNb == 0); 1195 return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); 1196 #elif defined(HUF_FORCE_DECOMPRESS_X2) 1197 (void)algoNb; 1198 assert(algoNb == 1); 1199 return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); 1200 #else 1201 return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) : 1202 HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); 1203 #endif 1204 } 1205 } 1206 1207