1e0c1b49fSNick Terrell /* ****************************************************************** 2e0c1b49fSNick Terrell * huff0 huffman decoder, 3e0c1b49fSNick Terrell * part of Finite State Entropy library 4e0c1b49fSNick Terrell * Copyright (c) Yann Collet, Facebook, Inc. 5e0c1b49fSNick Terrell * 6e0c1b49fSNick Terrell * You can contact the author at : 7e0c1b49fSNick Terrell * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy 8e0c1b49fSNick Terrell * 9e0c1b49fSNick Terrell * This source code is licensed under both the BSD-style license (found in the 10e0c1b49fSNick Terrell * LICENSE file in the root directory of this source tree) and the GPLv2 (found 11e0c1b49fSNick Terrell * in the COPYING file in the root directory of this source tree). 12e0c1b49fSNick Terrell * You may select, at your option, one of the above-listed licenses. 13e0c1b49fSNick Terrell ****************************************************************** */ 14e0c1b49fSNick Terrell 15e0c1b49fSNick Terrell /* ************************************************************** 16e0c1b49fSNick Terrell * Dependencies 17e0c1b49fSNick Terrell ****************************************************************/ 18e0c1b49fSNick Terrell #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset */ 19e0c1b49fSNick Terrell #include "../common/compiler.h" 20e0c1b49fSNick Terrell #include "../common/bitstream.h" /* BIT_* */ 21e0c1b49fSNick Terrell #include "../common/fse.h" /* to compress headers */ 22e0c1b49fSNick Terrell #define HUF_STATIC_LINKING_ONLY 23e0c1b49fSNick Terrell #include "../common/huf.h" 24e0c1b49fSNick Terrell #include "../common/error_private.h" 25*2aa14b1aSNick Terrell #include "../common/zstd_internal.h" 26*2aa14b1aSNick Terrell 27*2aa14b1aSNick Terrell /* ************************************************************** 28*2aa14b1aSNick Terrell * Constants 29*2aa14b1aSNick Terrell ****************************************************************/ 30*2aa14b1aSNick Terrell 31*2aa14b1aSNick Terrell #define HUF_DECODER_FAST_TABLELOG 11 32e0c1b49fSNick Terrell 33e0c1b49fSNick Terrell /* ************************************************************** 34e0c1b49fSNick Terrell * Macros 35e0c1b49fSNick Terrell ****************************************************************/ 36e0c1b49fSNick Terrell 37e0c1b49fSNick Terrell /* These two optional macros force the use one way or another of the two 38e0c1b49fSNick Terrell * Huffman decompression implementations. You can't force in both directions 39e0c1b49fSNick Terrell * at the same time. 40e0c1b49fSNick Terrell */ 41e0c1b49fSNick Terrell #if defined(HUF_FORCE_DECOMPRESS_X1) && \ 42e0c1b49fSNick Terrell defined(HUF_FORCE_DECOMPRESS_X2) 43e0c1b49fSNick Terrell #error "Cannot force the use of the X1 and X2 decoders at the same time!" 44e0c1b49fSNick Terrell #endif 45e0c1b49fSNick Terrell 46*2aa14b1aSNick Terrell #if ZSTD_ENABLE_ASM_X86_64_BMI2 && DYNAMIC_BMI2 47*2aa14b1aSNick Terrell # define HUF_ASM_X86_64_BMI2_ATTRS BMI2_TARGET_ATTRIBUTE 48*2aa14b1aSNick Terrell #else 49*2aa14b1aSNick Terrell # define HUF_ASM_X86_64_BMI2_ATTRS 50*2aa14b1aSNick Terrell #endif 51*2aa14b1aSNick Terrell 52*2aa14b1aSNick Terrell #define HUF_EXTERN_C 53*2aa14b1aSNick Terrell #define HUF_ASM_DECL HUF_EXTERN_C 54*2aa14b1aSNick Terrell 55*2aa14b1aSNick Terrell #if DYNAMIC_BMI2 || (ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)) 56*2aa14b1aSNick Terrell # define HUF_NEED_BMI2_FUNCTION 1 57*2aa14b1aSNick Terrell #else 58*2aa14b1aSNick Terrell # define HUF_NEED_BMI2_FUNCTION 0 59*2aa14b1aSNick Terrell #endif 60*2aa14b1aSNick Terrell 61*2aa14b1aSNick Terrell #if !(ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)) 62*2aa14b1aSNick Terrell # define HUF_NEED_DEFAULT_FUNCTION 1 63*2aa14b1aSNick Terrell #else 64*2aa14b1aSNick Terrell # define HUF_NEED_DEFAULT_FUNCTION 0 65*2aa14b1aSNick Terrell #endif 66e0c1b49fSNick Terrell 67e0c1b49fSNick Terrell /* ************************************************************** 68e0c1b49fSNick Terrell * Error Management 69e0c1b49fSNick Terrell ****************************************************************/ 70e0c1b49fSNick Terrell #define HUF_isError ERR_isError 71e0c1b49fSNick Terrell 72e0c1b49fSNick Terrell 73e0c1b49fSNick Terrell /* ************************************************************** 74e0c1b49fSNick Terrell * Byte alignment for workSpace management 75e0c1b49fSNick Terrell ****************************************************************/ 76e0c1b49fSNick Terrell #define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1) 77e0c1b49fSNick Terrell #define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask)) 78e0c1b49fSNick Terrell 79e0c1b49fSNick Terrell 80e0c1b49fSNick Terrell /* ************************************************************** 81e0c1b49fSNick Terrell * BMI2 Variant Wrappers 82e0c1b49fSNick Terrell ****************************************************************/ 83e0c1b49fSNick Terrell #if DYNAMIC_BMI2 84e0c1b49fSNick Terrell 85e0c1b49fSNick Terrell #define HUF_DGEN(fn) \ 86e0c1b49fSNick Terrell \ 87e0c1b49fSNick Terrell static size_t fn##_default( \ 88e0c1b49fSNick Terrell void* dst, size_t dstSize, \ 89e0c1b49fSNick Terrell const void* cSrc, size_t cSrcSize, \ 90e0c1b49fSNick Terrell const HUF_DTable* DTable) \ 91e0c1b49fSNick Terrell { \ 92e0c1b49fSNick Terrell return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ 93e0c1b49fSNick Terrell } \ 94e0c1b49fSNick Terrell \ 95*2aa14b1aSNick Terrell static BMI2_TARGET_ATTRIBUTE size_t fn##_bmi2( \ 96e0c1b49fSNick Terrell void* dst, size_t dstSize, \ 97e0c1b49fSNick Terrell const void* cSrc, size_t cSrcSize, \ 98e0c1b49fSNick Terrell const HUF_DTable* DTable) \ 99e0c1b49fSNick Terrell { \ 100e0c1b49fSNick Terrell return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ 101e0c1b49fSNick Terrell } \ 102e0c1b49fSNick Terrell \ 103e0c1b49fSNick Terrell static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ 104e0c1b49fSNick Terrell size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ 105e0c1b49fSNick Terrell { \ 106e0c1b49fSNick Terrell if (bmi2) { \ 107e0c1b49fSNick Terrell return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \ 108e0c1b49fSNick Terrell } \ 109e0c1b49fSNick Terrell return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \ 110e0c1b49fSNick Terrell } 111e0c1b49fSNick Terrell 112e0c1b49fSNick Terrell #else 113e0c1b49fSNick Terrell 114e0c1b49fSNick Terrell #define HUF_DGEN(fn) \ 115e0c1b49fSNick Terrell static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ 116e0c1b49fSNick Terrell size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ 117e0c1b49fSNick Terrell { \ 118e0c1b49fSNick Terrell (void)bmi2; \ 119e0c1b49fSNick Terrell return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ 120e0c1b49fSNick Terrell } 121e0c1b49fSNick Terrell 122e0c1b49fSNick Terrell #endif 123e0c1b49fSNick Terrell 124e0c1b49fSNick Terrell 125e0c1b49fSNick Terrell /*-***************************/ 126e0c1b49fSNick Terrell /* generic DTableDesc */ 127e0c1b49fSNick Terrell /*-***************************/ 128e0c1b49fSNick Terrell typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc; 129e0c1b49fSNick Terrell 130e0c1b49fSNick Terrell static DTableDesc HUF_getDTableDesc(const HUF_DTable* table) 131e0c1b49fSNick Terrell { 132e0c1b49fSNick Terrell DTableDesc dtd; 133e0c1b49fSNick Terrell ZSTD_memcpy(&dtd, table, sizeof(dtd)); 134e0c1b49fSNick Terrell return dtd; 135e0c1b49fSNick Terrell } 136e0c1b49fSNick Terrell 137*2aa14b1aSNick Terrell #if ZSTD_ENABLE_ASM_X86_64_BMI2 138*2aa14b1aSNick Terrell 139*2aa14b1aSNick Terrell static size_t HUF_initDStream(BYTE const* ip) { 140*2aa14b1aSNick Terrell BYTE const lastByte = ip[7]; 141*2aa14b1aSNick Terrell size_t const bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; 142*2aa14b1aSNick Terrell size_t const value = MEM_readLEST(ip) | 1; 143*2aa14b1aSNick Terrell assert(bitsConsumed <= 8); 144*2aa14b1aSNick Terrell return value << bitsConsumed; 145*2aa14b1aSNick Terrell } 146*2aa14b1aSNick Terrell typedef struct { 147*2aa14b1aSNick Terrell BYTE const* ip[4]; 148*2aa14b1aSNick Terrell BYTE* op[4]; 149*2aa14b1aSNick Terrell U64 bits[4]; 150*2aa14b1aSNick Terrell void const* dt; 151*2aa14b1aSNick Terrell BYTE const* ilimit; 152*2aa14b1aSNick Terrell BYTE* oend; 153*2aa14b1aSNick Terrell BYTE const* iend[4]; 154*2aa14b1aSNick Terrell } HUF_DecompressAsmArgs; 155*2aa14b1aSNick Terrell 156*2aa14b1aSNick Terrell /* 157*2aa14b1aSNick Terrell * Initializes args for the asm decoding loop. 158*2aa14b1aSNick Terrell * @returns 0 on success 159*2aa14b1aSNick Terrell * 1 if the fallback implementation should be used. 160*2aa14b1aSNick Terrell * Or an error code on failure. 161*2aa14b1aSNick Terrell */ 162*2aa14b1aSNick Terrell static size_t HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs* args, void* dst, size_t dstSize, void const* src, size_t srcSize, const HUF_DTable* DTable) 163*2aa14b1aSNick Terrell { 164*2aa14b1aSNick Terrell void const* dt = DTable + 1; 165*2aa14b1aSNick Terrell U32 const dtLog = HUF_getDTableDesc(DTable).tableLog; 166*2aa14b1aSNick Terrell 167*2aa14b1aSNick Terrell const BYTE* const ilimit = (const BYTE*)src + 6 + 8; 168*2aa14b1aSNick Terrell 169*2aa14b1aSNick Terrell BYTE* const oend = (BYTE*)dst + dstSize; 170*2aa14b1aSNick Terrell 171*2aa14b1aSNick Terrell /* The following condition is false on x32 platform, 172*2aa14b1aSNick Terrell * but HUF_asm is not compatible with this ABI */ 173*2aa14b1aSNick Terrell if (!(MEM_isLittleEndian() && !MEM_32bits())) return 1; 174*2aa14b1aSNick Terrell 175*2aa14b1aSNick Terrell /* strict minimum : jump table + 1 byte per stream */ 176*2aa14b1aSNick Terrell if (srcSize < 10) 177*2aa14b1aSNick Terrell return ERROR(corruption_detected); 178*2aa14b1aSNick Terrell 179*2aa14b1aSNick Terrell /* Must have at least 8 bytes per stream because we don't handle initializing smaller bit containers. 180*2aa14b1aSNick Terrell * If table log is not correct at this point, fallback to the old decoder. 181*2aa14b1aSNick Terrell * On small inputs we don't have enough data to trigger the fast loop, so use the old decoder. 182*2aa14b1aSNick Terrell */ 183*2aa14b1aSNick Terrell if (dtLog != HUF_DECODER_FAST_TABLELOG) 184*2aa14b1aSNick Terrell return 1; 185*2aa14b1aSNick Terrell 186*2aa14b1aSNick Terrell /* Read the jump table. */ 187*2aa14b1aSNick Terrell { 188*2aa14b1aSNick Terrell const BYTE* const istart = (const BYTE*)src; 189*2aa14b1aSNick Terrell size_t const length1 = MEM_readLE16(istart); 190*2aa14b1aSNick Terrell size_t const length2 = MEM_readLE16(istart+2); 191*2aa14b1aSNick Terrell size_t const length3 = MEM_readLE16(istart+4); 192*2aa14b1aSNick Terrell size_t const length4 = srcSize - (length1 + length2 + length3 + 6); 193*2aa14b1aSNick Terrell args->iend[0] = istart + 6; /* jumpTable */ 194*2aa14b1aSNick Terrell args->iend[1] = args->iend[0] + length1; 195*2aa14b1aSNick Terrell args->iend[2] = args->iend[1] + length2; 196*2aa14b1aSNick Terrell args->iend[3] = args->iend[2] + length3; 197*2aa14b1aSNick Terrell 198*2aa14b1aSNick Terrell /* HUF_initDStream() requires this, and this small of an input 199*2aa14b1aSNick Terrell * won't benefit from the ASM loop anyways. 200*2aa14b1aSNick Terrell * length1 must be >= 16 so that ip[0] >= ilimit before the loop 201*2aa14b1aSNick Terrell * starts. 202*2aa14b1aSNick Terrell */ 203*2aa14b1aSNick Terrell if (length1 < 16 || length2 < 8 || length3 < 8 || length4 < 8) 204*2aa14b1aSNick Terrell return 1; 205*2aa14b1aSNick Terrell if (length4 > srcSize) return ERROR(corruption_detected); /* overflow */ 206*2aa14b1aSNick Terrell } 207*2aa14b1aSNick Terrell /* ip[] contains the position that is currently loaded into bits[]. */ 208*2aa14b1aSNick Terrell args->ip[0] = args->iend[1] - sizeof(U64); 209*2aa14b1aSNick Terrell args->ip[1] = args->iend[2] - sizeof(U64); 210*2aa14b1aSNick Terrell args->ip[2] = args->iend[3] - sizeof(U64); 211*2aa14b1aSNick Terrell args->ip[3] = (BYTE const*)src + srcSize - sizeof(U64); 212*2aa14b1aSNick Terrell 213*2aa14b1aSNick Terrell /* op[] contains the output pointers. */ 214*2aa14b1aSNick Terrell args->op[0] = (BYTE*)dst; 215*2aa14b1aSNick Terrell args->op[1] = args->op[0] + (dstSize+3)/4; 216*2aa14b1aSNick Terrell args->op[2] = args->op[1] + (dstSize+3)/4; 217*2aa14b1aSNick Terrell args->op[3] = args->op[2] + (dstSize+3)/4; 218*2aa14b1aSNick Terrell 219*2aa14b1aSNick Terrell /* No point to call the ASM loop for tiny outputs. */ 220*2aa14b1aSNick Terrell if (args->op[3] >= oend) 221*2aa14b1aSNick Terrell return 1; 222*2aa14b1aSNick Terrell 223*2aa14b1aSNick Terrell /* bits[] is the bit container. 224*2aa14b1aSNick Terrell * It is read from the MSB down to the LSB. 225*2aa14b1aSNick Terrell * It is shifted left as it is read, and zeros are 226*2aa14b1aSNick Terrell * shifted in. After the lowest valid bit a 1 is 227*2aa14b1aSNick Terrell * set, so that CountTrailingZeros(bits[]) can be used 228*2aa14b1aSNick Terrell * to count how many bits we've consumed. 229*2aa14b1aSNick Terrell */ 230*2aa14b1aSNick Terrell args->bits[0] = HUF_initDStream(args->ip[0]); 231*2aa14b1aSNick Terrell args->bits[1] = HUF_initDStream(args->ip[1]); 232*2aa14b1aSNick Terrell args->bits[2] = HUF_initDStream(args->ip[2]); 233*2aa14b1aSNick Terrell args->bits[3] = HUF_initDStream(args->ip[3]); 234*2aa14b1aSNick Terrell 235*2aa14b1aSNick Terrell /* If ip[] >= ilimit, it is guaranteed to be safe to 236*2aa14b1aSNick Terrell * reload bits[]. It may be beyond its section, but is 237*2aa14b1aSNick Terrell * guaranteed to be valid (>= istart). 238*2aa14b1aSNick Terrell */ 239*2aa14b1aSNick Terrell args->ilimit = ilimit; 240*2aa14b1aSNick Terrell 241*2aa14b1aSNick Terrell args->oend = oend; 242*2aa14b1aSNick Terrell args->dt = dt; 243*2aa14b1aSNick Terrell 244*2aa14b1aSNick Terrell return 0; 245*2aa14b1aSNick Terrell } 246*2aa14b1aSNick Terrell 247*2aa14b1aSNick Terrell static size_t HUF_initRemainingDStream(BIT_DStream_t* bit, HUF_DecompressAsmArgs const* args, int stream, BYTE* segmentEnd) 248*2aa14b1aSNick Terrell { 249*2aa14b1aSNick Terrell /* Validate that we haven't overwritten. */ 250*2aa14b1aSNick Terrell if (args->op[stream] > segmentEnd) 251*2aa14b1aSNick Terrell return ERROR(corruption_detected); 252*2aa14b1aSNick Terrell /* Validate that we haven't read beyond iend[]. 253*2aa14b1aSNick Terrell * Note that ip[] may be < iend[] because the MSB is 254*2aa14b1aSNick Terrell * the next bit to read, and we may have consumed 100% 255*2aa14b1aSNick Terrell * of the stream, so down to iend[i] - 8 is valid. 256*2aa14b1aSNick Terrell */ 257*2aa14b1aSNick Terrell if (args->ip[stream] < args->iend[stream] - 8) 258*2aa14b1aSNick Terrell return ERROR(corruption_detected); 259*2aa14b1aSNick Terrell 260*2aa14b1aSNick Terrell /* Construct the BIT_DStream_t. */ 261*2aa14b1aSNick Terrell bit->bitContainer = MEM_readLE64(args->ip[stream]); 262*2aa14b1aSNick Terrell bit->bitsConsumed = ZSTD_countTrailingZeros((size_t)args->bits[stream]); 263*2aa14b1aSNick Terrell bit->start = (const char*)args->iend[0]; 264*2aa14b1aSNick Terrell bit->limitPtr = bit->start + sizeof(size_t); 265*2aa14b1aSNick Terrell bit->ptr = (const char*)args->ip[stream]; 266*2aa14b1aSNick Terrell 267*2aa14b1aSNick Terrell return 0; 268*2aa14b1aSNick Terrell } 269*2aa14b1aSNick Terrell #endif 270*2aa14b1aSNick Terrell 271e0c1b49fSNick Terrell 272e0c1b49fSNick Terrell #ifndef HUF_FORCE_DECOMPRESS_X2 273e0c1b49fSNick Terrell 274e0c1b49fSNick Terrell /*-***************************/ 275e0c1b49fSNick Terrell /* single-symbol decoding */ 276e0c1b49fSNick Terrell /*-***************************/ 277*2aa14b1aSNick Terrell typedef struct { BYTE nbBits; BYTE byte; } HUF_DEltX1; /* single-symbol decoding */ 278e0c1b49fSNick Terrell 279e0c1b49fSNick Terrell /* 280e0c1b49fSNick Terrell * Packs 4 HUF_DEltX1 structs into a U64. This is used to lay down 4 entries at 281e0c1b49fSNick Terrell * a time. 282e0c1b49fSNick Terrell */ 283e0c1b49fSNick Terrell static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) { 284e0c1b49fSNick Terrell U64 D4; 285e0c1b49fSNick Terrell if (MEM_isLittleEndian()) { 286e0c1b49fSNick Terrell D4 = (symbol << 8) + nbBits; 287*2aa14b1aSNick Terrell } else { 288*2aa14b1aSNick Terrell D4 = symbol + (nbBits << 8); 289e0c1b49fSNick Terrell } 290e0c1b49fSNick Terrell D4 *= 0x0001000100010001ULL; 291e0c1b49fSNick Terrell return D4; 292e0c1b49fSNick Terrell } 293e0c1b49fSNick Terrell 294*2aa14b1aSNick Terrell /* 295*2aa14b1aSNick Terrell * Increase the tableLog to targetTableLog and rescales the stats. 296*2aa14b1aSNick Terrell * If tableLog > targetTableLog this is a no-op. 297*2aa14b1aSNick Terrell * @returns New tableLog 298*2aa14b1aSNick Terrell */ 299*2aa14b1aSNick Terrell static U32 HUF_rescaleStats(BYTE* huffWeight, U32* rankVal, U32 nbSymbols, U32 tableLog, U32 targetTableLog) 300*2aa14b1aSNick Terrell { 301*2aa14b1aSNick Terrell if (tableLog > targetTableLog) 302*2aa14b1aSNick Terrell return tableLog; 303*2aa14b1aSNick Terrell if (tableLog < targetTableLog) { 304*2aa14b1aSNick Terrell U32 const scale = targetTableLog - tableLog; 305*2aa14b1aSNick Terrell U32 s; 306*2aa14b1aSNick Terrell /* Increase the weight for all non-zero probability symbols by scale. */ 307*2aa14b1aSNick Terrell for (s = 0; s < nbSymbols; ++s) { 308*2aa14b1aSNick Terrell huffWeight[s] += (BYTE)((huffWeight[s] == 0) ? 0 : scale); 309*2aa14b1aSNick Terrell } 310*2aa14b1aSNick Terrell /* Update rankVal to reflect the new weights. 311*2aa14b1aSNick Terrell * All weights except 0 get moved to weight + scale. 312*2aa14b1aSNick Terrell * Weights [1, scale] are empty. 313*2aa14b1aSNick Terrell */ 314*2aa14b1aSNick Terrell for (s = targetTableLog; s > scale; --s) { 315*2aa14b1aSNick Terrell rankVal[s] = rankVal[s - scale]; 316*2aa14b1aSNick Terrell } 317*2aa14b1aSNick Terrell for (s = scale; s > 0; --s) { 318*2aa14b1aSNick Terrell rankVal[s] = 0; 319*2aa14b1aSNick Terrell } 320*2aa14b1aSNick Terrell } 321*2aa14b1aSNick Terrell return targetTableLog; 322*2aa14b1aSNick Terrell } 323*2aa14b1aSNick Terrell 324e0c1b49fSNick Terrell typedef struct { 325e0c1b49fSNick Terrell U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; 326e0c1b49fSNick Terrell U32 rankStart[HUF_TABLELOG_ABSOLUTEMAX + 1]; 327e0c1b49fSNick Terrell U32 statsWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32]; 328e0c1b49fSNick Terrell BYTE symbols[HUF_SYMBOLVALUE_MAX + 1]; 329e0c1b49fSNick Terrell BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; 330e0c1b49fSNick Terrell } HUF_ReadDTableX1_Workspace; 331e0c1b49fSNick Terrell 332e0c1b49fSNick Terrell 333e0c1b49fSNick Terrell size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize) 334e0c1b49fSNick Terrell { 335e0c1b49fSNick Terrell return HUF_readDTableX1_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0); 336e0c1b49fSNick Terrell } 337e0c1b49fSNick Terrell 338e0c1b49fSNick Terrell size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2) 339e0c1b49fSNick Terrell { 340e0c1b49fSNick Terrell U32 tableLog = 0; 341e0c1b49fSNick Terrell U32 nbSymbols = 0; 342e0c1b49fSNick Terrell size_t iSize; 343e0c1b49fSNick Terrell void* const dtPtr = DTable + 1; 344e0c1b49fSNick Terrell HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr; 345e0c1b49fSNick Terrell HUF_ReadDTableX1_Workspace* wksp = (HUF_ReadDTableX1_Workspace*)workSpace; 346e0c1b49fSNick Terrell 347e0c1b49fSNick Terrell DEBUG_STATIC_ASSERT(HUF_DECOMPRESS_WORKSPACE_SIZE >= sizeof(*wksp)); 348e0c1b49fSNick Terrell if (sizeof(*wksp) > wkspSize) return ERROR(tableLog_tooLarge); 349e0c1b49fSNick Terrell 350e0c1b49fSNick Terrell DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable)); 351e0c1b49fSNick Terrell /* ZSTD_memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */ 352e0c1b49fSNick Terrell 353e0c1b49fSNick Terrell iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), bmi2); 354e0c1b49fSNick Terrell if (HUF_isError(iSize)) return iSize; 355e0c1b49fSNick Terrell 356*2aa14b1aSNick Terrell 357e0c1b49fSNick Terrell /* Table header */ 358e0c1b49fSNick Terrell { DTableDesc dtd = HUF_getDTableDesc(DTable); 359*2aa14b1aSNick Terrell U32 const maxTableLog = dtd.maxTableLog + 1; 360*2aa14b1aSNick Terrell U32 const targetTableLog = MIN(maxTableLog, HUF_DECODER_FAST_TABLELOG); 361*2aa14b1aSNick Terrell tableLog = HUF_rescaleStats(wksp->huffWeight, wksp->rankVal, nbSymbols, tableLog, targetTableLog); 362e0c1b49fSNick Terrell if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */ 363e0c1b49fSNick Terrell dtd.tableType = 0; 364e0c1b49fSNick Terrell dtd.tableLog = (BYTE)tableLog; 365e0c1b49fSNick Terrell ZSTD_memcpy(DTable, &dtd, sizeof(dtd)); 366e0c1b49fSNick Terrell } 367e0c1b49fSNick Terrell 368e0c1b49fSNick Terrell /* Compute symbols and rankStart given rankVal: 369e0c1b49fSNick Terrell * 370e0c1b49fSNick Terrell * rankVal already contains the number of values of each weight. 371e0c1b49fSNick Terrell * 372e0c1b49fSNick Terrell * symbols contains the symbols ordered by weight. First are the rankVal[0] 373e0c1b49fSNick Terrell * weight 0 symbols, followed by the rankVal[1] weight 1 symbols, and so on. 374e0c1b49fSNick Terrell * symbols[0] is filled (but unused) to avoid a branch. 375e0c1b49fSNick Terrell * 376e0c1b49fSNick Terrell * rankStart contains the offset where each rank belongs in the DTable. 377e0c1b49fSNick Terrell * rankStart[0] is not filled because there are no entries in the table for 378e0c1b49fSNick Terrell * weight 0. 379e0c1b49fSNick Terrell */ 380e0c1b49fSNick Terrell { 381e0c1b49fSNick Terrell int n; 382e0c1b49fSNick Terrell int nextRankStart = 0; 383e0c1b49fSNick Terrell int const unroll = 4; 384e0c1b49fSNick Terrell int const nLimit = (int)nbSymbols - unroll + 1; 385e0c1b49fSNick Terrell for (n=0; n<(int)tableLog+1; n++) { 386e0c1b49fSNick Terrell U32 const curr = nextRankStart; 387e0c1b49fSNick Terrell nextRankStart += wksp->rankVal[n]; 388e0c1b49fSNick Terrell wksp->rankStart[n] = curr; 389e0c1b49fSNick Terrell } 390e0c1b49fSNick Terrell for (n=0; n < nLimit; n += unroll) { 391e0c1b49fSNick Terrell int u; 392e0c1b49fSNick Terrell for (u=0; u < unroll; ++u) { 393e0c1b49fSNick Terrell size_t const w = wksp->huffWeight[n+u]; 394e0c1b49fSNick Terrell wksp->symbols[wksp->rankStart[w]++] = (BYTE)(n+u); 395e0c1b49fSNick Terrell } 396e0c1b49fSNick Terrell } 397e0c1b49fSNick Terrell for (; n < (int)nbSymbols; ++n) { 398e0c1b49fSNick Terrell size_t const w = wksp->huffWeight[n]; 399e0c1b49fSNick Terrell wksp->symbols[wksp->rankStart[w]++] = (BYTE)n; 400e0c1b49fSNick Terrell } 401e0c1b49fSNick Terrell } 402e0c1b49fSNick Terrell 403e0c1b49fSNick Terrell /* fill DTable 404e0c1b49fSNick Terrell * We fill all entries of each weight in order. 405*2aa14b1aSNick Terrell * That way length is a constant for each iteration of the outer loop. 406e0c1b49fSNick Terrell * We can switch based on the length to a different inner loop which is 407e0c1b49fSNick Terrell * optimized for that particular case. 408e0c1b49fSNick Terrell */ 409e0c1b49fSNick Terrell { 410e0c1b49fSNick Terrell U32 w; 411e0c1b49fSNick Terrell int symbol=wksp->rankVal[0]; 412e0c1b49fSNick Terrell int rankStart=0; 413e0c1b49fSNick Terrell for (w=1; w<tableLog+1; ++w) { 414e0c1b49fSNick Terrell int const symbolCount = wksp->rankVal[w]; 415e0c1b49fSNick Terrell int const length = (1 << w) >> 1; 416e0c1b49fSNick Terrell int uStart = rankStart; 417e0c1b49fSNick Terrell BYTE const nbBits = (BYTE)(tableLog + 1 - w); 418e0c1b49fSNick Terrell int s; 419e0c1b49fSNick Terrell int u; 420e0c1b49fSNick Terrell switch (length) { 421e0c1b49fSNick Terrell case 1: 422e0c1b49fSNick Terrell for (s=0; s<symbolCount; ++s) { 423e0c1b49fSNick Terrell HUF_DEltX1 D; 424e0c1b49fSNick Terrell D.byte = wksp->symbols[symbol + s]; 425e0c1b49fSNick Terrell D.nbBits = nbBits; 426e0c1b49fSNick Terrell dt[uStart] = D; 427e0c1b49fSNick Terrell uStart += 1; 428e0c1b49fSNick Terrell } 429e0c1b49fSNick Terrell break; 430e0c1b49fSNick Terrell case 2: 431e0c1b49fSNick Terrell for (s=0; s<symbolCount; ++s) { 432e0c1b49fSNick Terrell HUF_DEltX1 D; 433e0c1b49fSNick Terrell D.byte = wksp->symbols[symbol + s]; 434e0c1b49fSNick Terrell D.nbBits = nbBits; 435e0c1b49fSNick Terrell dt[uStart+0] = D; 436e0c1b49fSNick Terrell dt[uStart+1] = D; 437e0c1b49fSNick Terrell uStart += 2; 438e0c1b49fSNick Terrell } 439e0c1b49fSNick Terrell break; 440e0c1b49fSNick Terrell case 4: 441e0c1b49fSNick Terrell for (s=0; s<symbolCount; ++s) { 442e0c1b49fSNick Terrell U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits); 443e0c1b49fSNick Terrell MEM_write64(dt + uStart, D4); 444e0c1b49fSNick Terrell uStart += 4; 445e0c1b49fSNick Terrell } 446e0c1b49fSNick Terrell break; 447e0c1b49fSNick Terrell case 8: 448e0c1b49fSNick Terrell for (s=0; s<symbolCount; ++s) { 449e0c1b49fSNick Terrell U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits); 450e0c1b49fSNick Terrell MEM_write64(dt + uStart, D4); 451e0c1b49fSNick Terrell MEM_write64(dt + uStart + 4, D4); 452e0c1b49fSNick Terrell uStart += 8; 453e0c1b49fSNick Terrell } 454e0c1b49fSNick Terrell break; 455e0c1b49fSNick Terrell default: 456e0c1b49fSNick Terrell for (s=0; s<symbolCount; ++s) { 457e0c1b49fSNick Terrell U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits); 458e0c1b49fSNick Terrell for (u=0; u < length; u += 16) { 459e0c1b49fSNick Terrell MEM_write64(dt + uStart + u + 0, D4); 460e0c1b49fSNick Terrell MEM_write64(dt + uStart + u + 4, D4); 461e0c1b49fSNick Terrell MEM_write64(dt + uStart + u + 8, D4); 462e0c1b49fSNick Terrell MEM_write64(dt + uStart + u + 12, D4); 463e0c1b49fSNick Terrell } 464e0c1b49fSNick Terrell assert(u == length); 465e0c1b49fSNick Terrell uStart += length; 466e0c1b49fSNick Terrell } 467e0c1b49fSNick Terrell break; 468e0c1b49fSNick Terrell } 469e0c1b49fSNick Terrell symbol += symbolCount; 470e0c1b49fSNick Terrell rankStart += symbolCount * length; 471e0c1b49fSNick Terrell } 472e0c1b49fSNick Terrell } 473e0c1b49fSNick Terrell return iSize; 474e0c1b49fSNick Terrell } 475e0c1b49fSNick Terrell 476e0c1b49fSNick Terrell FORCE_INLINE_TEMPLATE BYTE 477e0c1b49fSNick Terrell HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog) 478e0c1b49fSNick Terrell { 479e0c1b49fSNick Terrell size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */ 480e0c1b49fSNick Terrell BYTE const c = dt[val].byte; 481e0c1b49fSNick Terrell BIT_skipBits(Dstream, dt[val].nbBits); 482e0c1b49fSNick Terrell return c; 483e0c1b49fSNick Terrell } 484e0c1b49fSNick Terrell 485e0c1b49fSNick Terrell #define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \ 486e0c1b49fSNick Terrell *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog) 487e0c1b49fSNick Terrell 488e0c1b49fSNick Terrell #define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \ 489e0c1b49fSNick Terrell if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ 490e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) 491e0c1b49fSNick Terrell 492e0c1b49fSNick Terrell #define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \ 493e0c1b49fSNick Terrell if (MEM_64bits()) \ 494e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) 495e0c1b49fSNick Terrell 496e0c1b49fSNick Terrell HINT_INLINE size_t 497e0c1b49fSNick Terrell HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog) 498e0c1b49fSNick Terrell { 499e0c1b49fSNick Terrell BYTE* const pStart = p; 500e0c1b49fSNick Terrell 501e0c1b49fSNick Terrell /* up to 4 symbols at a time */ 502*2aa14b1aSNick Terrell if ((pEnd - p) > 3) { 503e0c1b49fSNick Terrell while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) { 504e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX1_2(p, bitDPtr); 505e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX1_1(p, bitDPtr); 506e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX1_2(p, bitDPtr); 507e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX1_0(p, bitDPtr); 508e0c1b49fSNick Terrell } 509*2aa14b1aSNick Terrell } else { 510*2aa14b1aSNick Terrell BIT_reloadDStream(bitDPtr); 511*2aa14b1aSNick Terrell } 512e0c1b49fSNick Terrell 513e0c1b49fSNick Terrell /* [0-3] symbols remaining */ 514e0c1b49fSNick Terrell if (MEM_32bits()) 515e0c1b49fSNick Terrell while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd)) 516e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX1_0(p, bitDPtr); 517e0c1b49fSNick Terrell 518e0c1b49fSNick Terrell /* no more data to retrieve from bitstream, no need to reload */ 519e0c1b49fSNick Terrell while (p < pEnd) 520e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX1_0(p, bitDPtr); 521e0c1b49fSNick Terrell 522e0c1b49fSNick Terrell return pEnd-pStart; 523e0c1b49fSNick Terrell } 524e0c1b49fSNick Terrell 525e0c1b49fSNick Terrell FORCE_INLINE_TEMPLATE size_t 526e0c1b49fSNick Terrell HUF_decompress1X1_usingDTable_internal_body( 527e0c1b49fSNick Terrell void* dst, size_t dstSize, 528e0c1b49fSNick Terrell const void* cSrc, size_t cSrcSize, 529e0c1b49fSNick Terrell const HUF_DTable* DTable) 530e0c1b49fSNick Terrell { 531e0c1b49fSNick Terrell BYTE* op = (BYTE*)dst; 532e0c1b49fSNick Terrell BYTE* const oend = op + dstSize; 533e0c1b49fSNick Terrell const void* dtPtr = DTable + 1; 534e0c1b49fSNick Terrell const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr; 535e0c1b49fSNick Terrell BIT_DStream_t bitD; 536e0c1b49fSNick Terrell DTableDesc const dtd = HUF_getDTableDesc(DTable); 537e0c1b49fSNick Terrell U32 const dtLog = dtd.tableLog; 538e0c1b49fSNick Terrell 539e0c1b49fSNick Terrell CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) ); 540e0c1b49fSNick Terrell 541e0c1b49fSNick Terrell HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog); 542e0c1b49fSNick Terrell 543e0c1b49fSNick Terrell if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected); 544e0c1b49fSNick Terrell 545e0c1b49fSNick Terrell return dstSize; 546e0c1b49fSNick Terrell } 547e0c1b49fSNick Terrell 548e0c1b49fSNick Terrell FORCE_INLINE_TEMPLATE size_t 549e0c1b49fSNick Terrell HUF_decompress4X1_usingDTable_internal_body( 550e0c1b49fSNick Terrell void* dst, size_t dstSize, 551e0c1b49fSNick Terrell const void* cSrc, size_t cSrcSize, 552e0c1b49fSNick Terrell const HUF_DTable* DTable) 553e0c1b49fSNick Terrell { 554e0c1b49fSNick Terrell /* Check */ 555e0c1b49fSNick Terrell if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ 556e0c1b49fSNick Terrell 557e0c1b49fSNick Terrell { const BYTE* const istart = (const BYTE*) cSrc; 558e0c1b49fSNick Terrell BYTE* const ostart = (BYTE*) dst; 559e0c1b49fSNick Terrell BYTE* const oend = ostart + dstSize; 560e0c1b49fSNick Terrell BYTE* const olimit = oend - 3; 561e0c1b49fSNick Terrell const void* const dtPtr = DTable + 1; 562e0c1b49fSNick Terrell const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr; 563e0c1b49fSNick Terrell 564e0c1b49fSNick Terrell /* Init */ 565e0c1b49fSNick Terrell BIT_DStream_t bitD1; 566e0c1b49fSNick Terrell BIT_DStream_t bitD2; 567e0c1b49fSNick Terrell BIT_DStream_t bitD3; 568e0c1b49fSNick Terrell BIT_DStream_t bitD4; 569e0c1b49fSNick Terrell size_t const length1 = MEM_readLE16(istart); 570e0c1b49fSNick Terrell size_t const length2 = MEM_readLE16(istart+2); 571e0c1b49fSNick Terrell size_t const length3 = MEM_readLE16(istart+4); 572e0c1b49fSNick Terrell size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6); 573e0c1b49fSNick Terrell const BYTE* const istart1 = istart + 6; /* jumpTable */ 574e0c1b49fSNick Terrell const BYTE* const istart2 = istart1 + length1; 575e0c1b49fSNick Terrell const BYTE* const istart3 = istart2 + length2; 576e0c1b49fSNick Terrell const BYTE* const istart4 = istart3 + length3; 577e0c1b49fSNick Terrell const size_t segmentSize = (dstSize+3) / 4; 578e0c1b49fSNick Terrell BYTE* const opStart2 = ostart + segmentSize; 579e0c1b49fSNick Terrell BYTE* const opStart3 = opStart2 + segmentSize; 580e0c1b49fSNick Terrell BYTE* const opStart4 = opStart3 + segmentSize; 581e0c1b49fSNick Terrell BYTE* op1 = ostart; 582e0c1b49fSNick Terrell BYTE* op2 = opStart2; 583e0c1b49fSNick Terrell BYTE* op3 = opStart3; 584e0c1b49fSNick Terrell BYTE* op4 = opStart4; 585e0c1b49fSNick Terrell DTableDesc const dtd = HUF_getDTableDesc(DTable); 586e0c1b49fSNick Terrell U32 const dtLog = dtd.tableLog; 587e0c1b49fSNick Terrell U32 endSignal = 1; 588e0c1b49fSNick Terrell 589e0c1b49fSNick Terrell if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ 590*2aa14b1aSNick Terrell if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */ 591e0c1b49fSNick Terrell CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); 592e0c1b49fSNick Terrell CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); 593e0c1b49fSNick Terrell CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); 594e0c1b49fSNick Terrell CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); 595e0c1b49fSNick Terrell 596e0c1b49fSNick Terrell /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */ 597*2aa14b1aSNick Terrell if ((size_t)(oend - op4) >= sizeof(size_t)) { 598e0c1b49fSNick Terrell for ( ; (endSignal) & (op4 < olimit) ; ) { 599e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX1_2(op1, &bitD1); 600e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX1_2(op2, &bitD2); 601e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX1_2(op3, &bitD3); 602e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX1_2(op4, &bitD4); 603e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX1_1(op1, &bitD1); 604e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX1_1(op2, &bitD2); 605e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX1_1(op3, &bitD3); 606e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX1_1(op4, &bitD4); 607e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX1_2(op1, &bitD1); 608e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX1_2(op2, &bitD2); 609e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX1_2(op3, &bitD3); 610e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX1_2(op4, &bitD4); 611e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX1_0(op1, &bitD1); 612e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX1_0(op2, &bitD2); 613e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX1_0(op3, &bitD3); 614e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX1_0(op4, &bitD4); 615e0c1b49fSNick Terrell endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished; 616e0c1b49fSNick Terrell endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished; 617e0c1b49fSNick Terrell endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished; 618e0c1b49fSNick Terrell endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished; 619e0c1b49fSNick Terrell } 620*2aa14b1aSNick Terrell } 621e0c1b49fSNick Terrell 622e0c1b49fSNick Terrell /* check corruption */ 623e0c1b49fSNick Terrell /* note : should not be necessary : op# advance in lock step, and we control op4. 624e0c1b49fSNick Terrell * but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */ 625e0c1b49fSNick Terrell if (op1 > opStart2) return ERROR(corruption_detected); 626e0c1b49fSNick Terrell if (op2 > opStart3) return ERROR(corruption_detected); 627e0c1b49fSNick Terrell if (op3 > opStart4) return ERROR(corruption_detected); 628e0c1b49fSNick Terrell /* note : op4 supposed already verified within main loop */ 629e0c1b49fSNick Terrell 630e0c1b49fSNick Terrell /* finish bitStreams one by one */ 631e0c1b49fSNick Terrell HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog); 632e0c1b49fSNick Terrell HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog); 633e0c1b49fSNick Terrell HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog); 634e0c1b49fSNick Terrell HUF_decodeStreamX1(op4, &bitD4, oend, dt, dtLog); 635e0c1b49fSNick Terrell 636e0c1b49fSNick Terrell /* check */ 637e0c1b49fSNick Terrell { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); 638e0c1b49fSNick Terrell if (!endCheck) return ERROR(corruption_detected); } 639e0c1b49fSNick Terrell 640e0c1b49fSNick Terrell /* decoded size */ 641e0c1b49fSNick Terrell return dstSize; 642e0c1b49fSNick Terrell } 643e0c1b49fSNick Terrell } 644e0c1b49fSNick Terrell 645*2aa14b1aSNick Terrell #if HUF_NEED_BMI2_FUNCTION 646*2aa14b1aSNick Terrell static BMI2_TARGET_ATTRIBUTE 647*2aa14b1aSNick Terrell size_t HUF_decompress4X1_usingDTable_internal_bmi2(void* dst, size_t dstSize, void const* cSrc, 648*2aa14b1aSNick Terrell size_t cSrcSize, HUF_DTable const* DTable) { 649*2aa14b1aSNick Terrell return HUF_decompress4X1_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable); 650*2aa14b1aSNick Terrell } 651*2aa14b1aSNick Terrell #endif 652*2aa14b1aSNick Terrell 653*2aa14b1aSNick Terrell #if HUF_NEED_DEFAULT_FUNCTION 654*2aa14b1aSNick Terrell static 655*2aa14b1aSNick Terrell size_t HUF_decompress4X1_usingDTable_internal_default(void* dst, size_t dstSize, void const* cSrc, 656*2aa14b1aSNick Terrell size_t cSrcSize, HUF_DTable const* DTable) { 657*2aa14b1aSNick Terrell return HUF_decompress4X1_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable); 658*2aa14b1aSNick Terrell } 659*2aa14b1aSNick Terrell #endif 660*2aa14b1aSNick Terrell 661*2aa14b1aSNick Terrell #if ZSTD_ENABLE_ASM_X86_64_BMI2 662*2aa14b1aSNick Terrell 663*2aa14b1aSNick Terrell HUF_ASM_DECL void HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop(HUF_DecompressAsmArgs* args) ZSTDLIB_HIDDEN; 664*2aa14b1aSNick Terrell 665*2aa14b1aSNick Terrell static HUF_ASM_X86_64_BMI2_ATTRS 666*2aa14b1aSNick Terrell size_t 667*2aa14b1aSNick Terrell HUF_decompress4X1_usingDTable_internal_bmi2_asm( 668*2aa14b1aSNick Terrell void* dst, size_t dstSize, 669*2aa14b1aSNick Terrell const void* cSrc, size_t cSrcSize, 670*2aa14b1aSNick Terrell const HUF_DTable* DTable) 671*2aa14b1aSNick Terrell { 672*2aa14b1aSNick Terrell void const* dt = DTable + 1; 673*2aa14b1aSNick Terrell const BYTE* const iend = (const BYTE*)cSrc + 6; 674*2aa14b1aSNick Terrell BYTE* const oend = (BYTE*)dst + dstSize; 675*2aa14b1aSNick Terrell HUF_DecompressAsmArgs args; 676*2aa14b1aSNick Terrell { 677*2aa14b1aSNick Terrell size_t const ret = HUF_DecompressAsmArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable); 678*2aa14b1aSNick Terrell FORWARD_IF_ERROR(ret, "Failed to init asm args"); 679*2aa14b1aSNick Terrell if (ret != 0) 680*2aa14b1aSNick Terrell return HUF_decompress4X1_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); 681*2aa14b1aSNick Terrell } 682*2aa14b1aSNick Terrell 683*2aa14b1aSNick Terrell assert(args.ip[0] >= args.ilimit); 684*2aa14b1aSNick Terrell HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop(&args); 685*2aa14b1aSNick Terrell 686*2aa14b1aSNick Terrell /* Our loop guarantees that ip[] >= ilimit and that we haven't 687*2aa14b1aSNick Terrell * overwritten any op[]. 688*2aa14b1aSNick Terrell */ 689*2aa14b1aSNick Terrell assert(args.ip[0] >= iend); 690*2aa14b1aSNick Terrell assert(args.ip[1] >= iend); 691*2aa14b1aSNick Terrell assert(args.ip[2] >= iend); 692*2aa14b1aSNick Terrell assert(args.ip[3] >= iend); 693*2aa14b1aSNick Terrell assert(args.op[3] <= oend); 694*2aa14b1aSNick Terrell (void)iend; 695*2aa14b1aSNick Terrell 696*2aa14b1aSNick Terrell /* finish bit streams one by one. */ 697*2aa14b1aSNick Terrell { 698*2aa14b1aSNick Terrell size_t const segmentSize = (dstSize+3) / 4; 699*2aa14b1aSNick Terrell BYTE* segmentEnd = (BYTE*)dst; 700*2aa14b1aSNick Terrell int i; 701*2aa14b1aSNick Terrell for (i = 0; i < 4; ++i) { 702*2aa14b1aSNick Terrell BIT_DStream_t bit; 703*2aa14b1aSNick Terrell if (segmentSize <= (size_t)(oend - segmentEnd)) 704*2aa14b1aSNick Terrell segmentEnd += segmentSize; 705*2aa14b1aSNick Terrell else 706*2aa14b1aSNick Terrell segmentEnd = oend; 707*2aa14b1aSNick Terrell FORWARD_IF_ERROR(HUF_initRemainingDStream(&bit, &args, i, segmentEnd), "corruption"); 708*2aa14b1aSNick Terrell /* Decompress and validate that we've produced exactly the expected length. */ 709*2aa14b1aSNick Terrell args.op[i] += HUF_decodeStreamX1(args.op[i], &bit, segmentEnd, (HUF_DEltX1 const*)dt, HUF_DECODER_FAST_TABLELOG); 710*2aa14b1aSNick Terrell if (args.op[i] != segmentEnd) return ERROR(corruption_detected); 711*2aa14b1aSNick Terrell } 712*2aa14b1aSNick Terrell } 713*2aa14b1aSNick Terrell 714*2aa14b1aSNick Terrell /* decoded size */ 715*2aa14b1aSNick Terrell return dstSize; 716*2aa14b1aSNick Terrell } 717*2aa14b1aSNick Terrell #endif /* ZSTD_ENABLE_ASM_X86_64_BMI2 */ 718e0c1b49fSNick Terrell 719e0c1b49fSNick Terrell typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize, 720e0c1b49fSNick Terrell const void *cSrc, 721e0c1b49fSNick Terrell size_t cSrcSize, 722e0c1b49fSNick Terrell const HUF_DTable *DTable); 723e0c1b49fSNick Terrell 724e0c1b49fSNick Terrell HUF_DGEN(HUF_decompress1X1_usingDTable_internal) 725e0c1b49fSNick Terrell 726*2aa14b1aSNick Terrell static size_t HUF_decompress4X1_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc, 727*2aa14b1aSNick Terrell size_t cSrcSize, HUF_DTable const* DTable, int bmi2) 728*2aa14b1aSNick Terrell { 729*2aa14b1aSNick Terrell #if DYNAMIC_BMI2 730*2aa14b1aSNick Terrell if (bmi2) { 731*2aa14b1aSNick Terrell # if ZSTD_ENABLE_ASM_X86_64_BMI2 732*2aa14b1aSNick Terrell return HUF_decompress4X1_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable); 733*2aa14b1aSNick Terrell # else 734*2aa14b1aSNick Terrell return HUF_decompress4X1_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); 735*2aa14b1aSNick Terrell # endif 736*2aa14b1aSNick Terrell } 737*2aa14b1aSNick Terrell #else 738*2aa14b1aSNick Terrell (void)bmi2; 739*2aa14b1aSNick Terrell #endif 740*2aa14b1aSNick Terrell 741*2aa14b1aSNick Terrell #if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__) 742*2aa14b1aSNick Terrell return HUF_decompress4X1_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable); 743*2aa14b1aSNick Terrell #else 744*2aa14b1aSNick Terrell return HUF_decompress4X1_usingDTable_internal_default(dst, dstSize, cSrc, cSrcSize, DTable); 745*2aa14b1aSNick Terrell #endif 746*2aa14b1aSNick Terrell } 747e0c1b49fSNick Terrell 748e0c1b49fSNick Terrell 749e0c1b49fSNick Terrell size_t HUF_decompress1X1_usingDTable( 750e0c1b49fSNick Terrell void* dst, size_t dstSize, 751e0c1b49fSNick Terrell const void* cSrc, size_t cSrcSize, 752e0c1b49fSNick Terrell const HUF_DTable* DTable) 753e0c1b49fSNick Terrell { 754e0c1b49fSNick Terrell DTableDesc dtd = HUF_getDTableDesc(DTable); 755e0c1b49fSNick Terrell if (dtd.tableType != 0) return ERROR(GENERIC); 756e0c1b49fSNick Terrell return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 757e0c1b49fSNick Terrell } 758e0c1b49fSNick Terrell 759e0c1b49fSNick Terrell size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, 760e0c1b49fSNick Terrell const void* cSrc, size_t cSrcSize, 761e0c1b49fSNick Terrell void* workSpace, size_t wkspSize) 762e0c1b49fSNick Terrell { 763e0c1b49fSNick Terrell const BYTE* ip = (const BYTE*) cSrc; 764e0c1b49fSNick Terrell 765e0c1b49fSNick Terrell size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize); 766e0c1b49fSNick Terrell if (HUF_isError(hSize)) return hSize; 767e0c1b49fSNick Terrell if (hSize >= cSrcSize) return ERROR(srcSize_wrong); 768e0c1b49fSNick Terrell ip += hSize; cSrcSize -= hSize; 769e0c1b49fSNick Terrell 770e0c1b49fSNick Terrell return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0); 771e0c1b49fSNick Terrell } 772e0c1b49fSNick Terrell 773e0c1b49fSNick Terrell 774e0c1b49fSNick Terrell size_t HUF_decompress4X1_usingDTable( 775e0c1b49fSNick Terrell void* dst, size_t dstSize, 776e0c1b49fSNick Terrell const void* cSrc, size_t cSrcSize, 777e0c1b49fSNick Terrell const HUF_DTable* DTable) 778e0c1b49fSNick Terrell { 779e0c1b49fSNick Terrell DTableDesc dtd = HUF_getDTableDesc(DTable); 780e0c1b49fSNick Terrell if (dtd.tableType != 0) return ERROR(GENERIC); 781e0c1b49fSNick Terrell return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 782e0c1b49fSNick Terrell } 783e0c1b49fSNick Terrell 784e0c1b49fSNick Terrell static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, 785e0c1b49fSNick Terrell const void* cSrc, size_t cSrcSize, 786e0c1b49fSNick Terrell void* workSpace, size_t wkspSize, int bmi2) 787e0c1b49fSNick Terrell { 788e0c1b49fSNick Terrell const BYTE* ip = (const BYTE*) cSrc; 789e0c1b49fSNick Terrell 790e0c1b49fSNick Terrell size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2); 791e0c1b49fSNick Terrell if (HUF_isError(hSize)) return hSize; 792e0c1b49fSNick Terrell if (hSize >= cSrcSize) return ERROR(srcSize_wrong); 793e0c1b49fSNick Terrell ip += hSize; cSrcSize -= hSize; 794e0c1b49fSNick Terrell 795e0c1b49fSNick Terrell return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); 796e0c1b49fSNick Terrell } 797e0c1b49fSNick Terrell 798e0c1b49fSNick Terrell size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, 799e0c1b49fSNick Terrell const void* cSrc, size_t cSrcSize, 800e0c1b49fSNick Terrell void* workSpace, size_t wkspSize) 801e0c1b49fSNick Terrell { 802e0c1b49fSNick Terrell return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0); 803e0c1b49fSNick Terrell } 804e0c1b49fSNick Terrell 805e0c1b49fSNick Terrell 806e0c1b49fSNick Terrell #endif /* HUF_FORCE_DECOMPRESS_X2 */ 807e0c1b49fSNick Terrell 808e0c1b49fSNick Terrell 809e0c1b49fSNick Terrell #ifndef HUF_FORCE_DECOMPRESS_X1 810e0c1b49fSNick Terrell 811e0c1b49fSNick Terrell /* *************************/ 812e0c1b49fSNick Terrell /* double-symbols decoding */ 813e0c1b49fSNick Terrell /* *************************/ 814e0c1b49fSNick Terrell 815e0c1b49fSNick Terrell typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2; /* double-symbols decoding */ 816*2aa14b1aSNick Terrell typedef struct { BYTE symbol; } sortedSymbol_t; 817e0c1b49fSNick Terrell typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1]; 818e0c1b49fSNick Terrell typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX]; 819e0c1b49fSNick Terrell 820*2aa14b1aSNick Terrell /* 821*2aa14b1aSNick Terrell * Constructs a HUF_DEltX2 in a U32. 822*2aa14b1aSNick Terrell */ 823*2aa14b1aSNick Terrell static U32 HUF_buildDEltX2U32(U32 symbol, U32 nbBits, U32 baseSeq, int level) 824*2aa14b1aSNick Terrell { 825*2aa14b1aSNick Terrell U32 seq; 826*2aa14b1aSNick Terrell DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, sequence) == 0); 827*2aa14b1aSNick Terrell DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, nbBits) == 2); 828*2aa14b1aSNick Terrell DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, length) == 3); 829*2aa14b1aSNick Terrell DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(U32)); 830*2aa14b1aSNick Terrell if (MEM_isLittleEndian()) { 831*2aa14b1aSNick Terrell seq = level == 1 ? symbol : (baseSeq + (symbol << 8)); 832*2aa14b1aSNick Terrell return seq + (nbBits << 16) + ((U32)level << 24); 833*2aa14b1aSNick Terrell } else { 834*2aa14b1aSNick Terrell seq = level == 1 ? (symbol << 8) : ((baseSeq << 8) + symbol); 835*2aa14b1aSNick Terrell return (seq << 16) + (nbBits << 8) + (U32)level; 836*2aa14b1aSNick Terrell } 837*2aa14b1aSNick Terrell } 838*2aa14b1aSNick Terrell 839*2aa14b1aSNick Terrell /* 840*2aa14b1aSNick Terrell * Constructs a HUF_DEltX2. 841*2aa14b1aSNick Terrell */ 842*2aa14b1aSNick Terrell static HUF_DEltX2 HUF_buildDEltX2(U32 symbol, U32 nbBits, U32 baseSeq, int level) 843*2aa14b1aSNick Terrell { 844*2aa14b1aSNick Terrell HUF_DEltX2 DElt; 845*2aa14b1aSNick Terrell U32 const val = HUF_buildDEltX2U32(symbol, nbBits, baseSeq, level); 846*2aa14b1aSNick Terrell DEBUG_STATIC_ASSERT(sizeof(DElt) == sizeof(val)); 847*2aa14b1aSNick Terrell ZSTD_memcpy(&DElt, &val, sizeof(val)); 848*2aa14b1aSNick Terrell return DElt; 849*2aa14b1aSNick Terrell } 850*2aa14b1aSNick Terrell 851*2aa14b1aSNick Terrell /* 852*2aa14b1aSNick Terrell * Constructs 2 HUF_DEltX2s and packs them into a U64. 853*2aa14b1aSNick Terrell */ 854*2aa14b1aSNick Terrell static U64 HUF_buildDEltX2U64(U32 symbol, U32 nbBits, U16 baseSeq, int level) 855*2aa14b1aSNick Terrell { 856*2aa14b1aSNick Terrell U32 DElt = HUF_buildDEltX2U32(symbol, nbBits, baseSeq, level); 857*2aa14b1aSNick Terrell return (U64)DElt + ((U64)DElt << 32); 858*2aa14b1aSNick Terrell } 859*2aa14b1aSNick Terrell 860*2aa14b1aSNick Terrell /* 861*2aa14b1aSNick Terrell * Fills the DTable rank with all the symbols from [begin, end) that are each 862*2aa14b1aSNick Terrell * nbBits long. 863*2aa14b1aSNick Terrell * 864*2aa14b1aSNick Terrell * @param DTableRank The start of the rank in the DTable. 865*2aa14b1aSNick Terrell * @param begin The first symbol to fill (inclusive). 866*2aa14b1aSNick Terrell * @param end The last symbol to fill (exclusive). 867*2aa14b1aSNick Terrell * @param nbBits Each symbol is nbBits long. 868*2aa14b1aSNick Terrell * @param tableLog The table log. 869*2aa14b1aSNick Terrell * @param baseSeq If level == 1 { 0 } else { the first level symbol } 870*2aa14b1aSNick Terrell * @param level The level in the table. Must be 1 or 2. 871*2aa14b1aSNick Terrell */ 872*2aa14b1aSNick Terrell static void HUF_fillDTableX2ForWeight( 873*2aa14b1aSNick Terrell HUF_DEltX2* DTableRank, 874*2aa14b1aSNick Terrell sortedSymbol_t const* begin, sortedSymbol_t const* end, 875*2aa14b1aSNick Terrell U32 nbBits, U32 tableLog, 876*2aa14b1aSNick Terrell U16 baseSeq, int const level) 877*2aa14b1aSNick Terrell { 878*2aa14b1aSNick Terrell U32 const length = 1U << ((tableLog - nbBits) & 0x1F /* quiet static-analyzer */); 879*2aa14b1aSNick Terrell const sortedSymbol_t* ptr; 880*2aa14b1aSNick Terrell assert(level >= 1 && level <= 2); 881*2aa14b1aSNick Terrell switch (length) { 882*2aa14b1aSNick Terrell case 1: 883*2aa14b1aSNick Terrell for (ptr = begin; ptr != end; ++ptr) { 884*2aa14b1aSNick Terrell HUF_DEltX2 const DElt = HUF_buildDEltX2(ptr->symbol, nbBits, baseSeq, level); 885*2aa14b1aSNick Terrell *DTableRank++ = DElt; 886*2aa14b1aSNick Terrell } 887*2aa14b1aSNick Terrell break; 888*2aa14b1aSNick Terrell case 2: 889*2aa14b1aSNick Terrell for (ptr = begin; ptr != end; ++ptr) { 890*2aa14b1aSNick Terrell HUF_DEltX2 const DElt = HUF_buildDEltX2(ptr->symbol, nbBits, baseSeq, level); 891*2aa14b1aSNick Terrell DTableRank[0] = DElt; 892*2aa14b1aSNick Terrell DTableRank[1] = DElt; 893*2aa14b1aSNick Terrell DTableRank += 2; 894*2aa14b1aSNick Terrell } 895*2aa14b1aSNick Terrell break; 896*2aa14b1aSNick Terrell case 4: 897*2aa14b1aSNick Terrell for (ptr = begin; ptr != end; ++ptr) { 898*2aa14b1aSNick Terrell U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level); 899*2aa14b1aSNick Terrell ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2)); 900*2aa14b1aSNick Terrell ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2)); 901*2aa14b1aSNick Terrell DTableRank += 4; 902*2aa14b1aSNick Terrell } 903*2aa14b1aSNick Terrell break; 904*2aa14b1aSNick Terrell case 8: 905*2aa14b1aSNick Terrell for (ptr = begin; ptr != end; ++ptr) { 906*2aa14b1aSNick Terrell U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level); 907*2aa14b1aSNick Terrell ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2)); 908*2aa14b1aSNick Terrell ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2)); 909*2aa14b1aSNick Terrell ZSTD_memcpy(DTableRank + 4, &DEltX2, sizeof(DEltX2)); 910*2aa14b1aSNick Terrell ZSTD_memcpy(DTableRank + 6, &DEltX2, sizeof(DEltX2)); 911*2aa14b1aSNick Terrell DTableRank += 8; 912*2aa14b1aSNick Terrell } 913*2aa14b1aSNick Terrell break; 914*2aa14b1aSNick Terrell default: 915*2aa14b1aSNick Terrell for (ptr = begin; ptr != end; ++ptr) { 916*2aa14b1aSNick Terrell U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level); 917*2aa14b1aSNick Terrell HUF_DEltX2* const DTableRankEnd = DTableRank + length; 918*2aa14b1aSNick Terrell for (; DTableRank != DTableRankEnd; DTableRank += 8) { 919*2aa14b1aSNick Terrell ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2)); 920*2aa14b1aSNick Terrell ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2)); 921*2aa14b1aSNick Terrell ZSTD_memcpy(DTableRank + 4, &DEltX2, sizeof(DEltX2)); 922*2aa14b1aSNick Terrell ZSTD_memcpy(DTableRank + 6, &DEltX2, sizeof(DEltX2)); 923*2aa14b1aSNick Terrell } 924*2aa14b1aSNick Terrell } 925*2aa14b1aSNick Terrell break; 926*2aa14b1aSNick Terrell } 927*2aa14b1aSNick Terrell } 928e0c1b49fSNick Terrell 929e0c1b49fSNick Terrell /* HUF_fillDTableX2Level2() : 930e0c1b49fSNick Terrell * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */ 931*2aa14b1aSNick Terrell static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 targetLog, const U32 consumedBits, 932*2aa14b1aSNick Terrell const U32* rankVal, const int minWeight, const int maxWeight1, 933*2aa14b1aSNick Terrell const sortedSymbol_t* sortedSymbols, U32 const* rankStart, 934*2aa14b1aSNick Terrell U32 nbBitsBaseline, U16 baseSeq) 935e0c1b49fSNick Terrell { 936*2aa14b1aSNick Terrell /* Fill skipped values (all positions up to rankVal[minWeight]). 937*2aa14b1aSNick Terrell * These are positions only get a single symbol because the combined weight 938*2aa14b1aSNick Terrell * is too large. 939*2aa14b1aSNick Terrell */ 940e0c1b49fSNick Terrell if (minWeight>1) { 941*2aa14b1aSNick Terrell U32 const length = 1U << ((targetLog - consumedBits) & 0x1F /* quiet static-analyzer */); 942*2aa14b1aSNick Terrell U64 const DEltX2 = HUF_buildDEltX2U64(baseSeq, consumedBits, /* baseSeq */ 0, /* level */ 1); 943*2aa14b1aSNick Terrell int const skipSize = rankVal[minWeight]; 944*2aa14b1aSNick Terrell assert(length > 1); 945*2aa14b1aSNick Terrell assert((U32)skipSize < length); 946*2aa14b1aSNick Terrell switch (length) { 947*2aa14b1aSNick Terrell case 2: 948*2aa14b1aSNick Terrell assert(skipSize == 1); 949*2aa14b1aSNick Terrell ZSTD_memcpy(DTable, &DEltX2, sizeof(DEltX2)); 950*2aa14b1aSNick Terrell break; 951*2aa14b1aSNick Terrell case 4: 952*2aa14b1aSNick Terrell assert(skipSize <= 4); 953*2aa14b1aSNick Terrell ZSTD_memcpy(DTable + 0, &DEltX2, sizeof(DEltX2)); 954*2aa14b1aSNick Terrell ZSTD_memcpy(DTable + 2, &DEltX2, sizeof(DEltX2)); 955*2aa14b1aSNick Terrell break; 956*2aa14b1aSNick Terrell default: 957*2aa14b1aSNick Terrell { 958*2aa14b1aSNick Terrell int i; 959*2aa14b1aSNick Terrell for (i = 0; i < skipSize; i += 8) { 960*2aa14b1aSNick Terrell ZSTD_memcpy(DTable + i + 0, &DEltX2, sizeof(DEltX2)); 961*2aa14b1aSNick Terrell ZSTD_memcpy(DTable + i + 2, &DEltX2, sizeof(DEltX2)); 962*2aa14b1aSNick Terrell ZSTD_memcpy(DTable + i + 4, &DEltX2, sizeof(DEltX2)); 963*2aa14b1aSNick Terrell ZSTD_memcpy(DTable + i + 6, &DEltX2, sizeof(DEltX2)); 964*2aa14b1aSNick Terrell } 965*2aa14b1aSNick Terrell } 966*2aa14b1aSNick Terrell } 967e0c1b49fSNick Terrell } 968e0c1b49fSNick Terrell 969*2aa14b1aSNick Terrell /* Fill each of the second level symbols by weight. */ 970*2aa14b1aSNick Terrell { 971*2aa14b1aSNick Terrell int w; 972*2aa14b1aSNick Terrell for (w = minWeight; w < maxWeight1; ++w) { 973*2aa14b1aSNick Terrell int const begin = rankStart[w]; 974*2aa14b1aSNick Terrell int const end = rankStart[w+1]; 975*2aa14b1aSNick Terrell U32 const nbBits = nbBitsBaseline - w; 976*2aa14b1aSNick Terrell U32 const totalBits = nbBits + consumedBits; 977*2aa14b1aSNick Terrell HUF_fillDTableX2ForWeight( 978*2aa14b1aSNick Terrell DTable + rankVal[w], 979*2aa14b1aSNick Terrell sortedSymbols + begin, sortedSymbols + end, 980*2aa14b1aSNick Terrell totalBits, targetLog, 981*2aa14b1aSNick Terrell baseSeq, /* level */ 2); 982e0c1b49fSNick Terrell } 983*2aa14b1aSNick Terrell } 984*2aa14b1aSNick Terrell } 985e0c1b49fSNick Terrell 986e0c1b49fSNick Terrell static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog, 987*2aa14b1aSNick Terrell const sortedSymbol_t* sortedList, 988e0c1b49fSNick Terrell const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight, 989*2aa14b1aSNick Terrell const U32 nbBitsBaseline) 990e0c1b49fSNick Terrell { 991*2aa14b1aSNick Terrell U32* const rankVal = rankValOrigin[0]; 992e0c1b49fSNick Terrell const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */ 993e0c1b49fSNick Terrell const U32 minBits = nbBitsBaseline - maxWeight; 994*2aa14b1aSNick Terrell int w; 995*2aa14b1aSNick Terrell int const wEnd = (int)maxWeight + 1; 996e0c1b49fSNick Terrell 997*2aa14b1aSNick Terrell /* Fill DTable in order of weight. */ 998*2aa14b1aSNick Terrell for (w = 1; w < wEnd; ++w) { 999*2aa14b1aSNick Terrell int const begin = (int)rankStart[w]; 1000*2aa14b1aSNick Terrell int const end = (int)rankStart[w+1]; 1001*2aa14b1aSNick Terrell U32 const nbBits = nbBitsBaseline - w; 1002e0c1b49fSNick Terrell 1003*2aa14b1aSNick Terrell if (targetLog-nbBits >= minBits) { 1004*2aa14b1aSNick Terrell /* Enough room for a second symbol. */ 1005*2aa14b1aSNick Terrell int start = rankVal[w]; 1006*2aa14b1aSNick Terrell U32 const length = 1U << ((targetLog - nbBits) & 0x1F /* quiet static-analyzer */); 1007e0c1b49fSNick Terrell int minWeight = nbBits + scaleLog; 1008*2aa14b1aSNick Terrell int s; 1009e0c1b49fSNick Terrell if (minWeight < 1) minWeight = 1; 1010*2aa14b1aSNick Terrell /* Fill the DTable for every symbol of weight w. 1011*2aa14b1aSNick Terrell * These symbols get at least 1 second symbol. 1012*2aa14b1aSNick Terrell */ 1013*2aa14b1aSNick Terrell for (s = begin; s != end; ++s) { 1014*2aa14b1aSNick Terrell HUF_fillDTableX2Level2( 1015*2aa14b1aSNick Terrell DTable + start, targetLog, nbBits, 1016*2aa14b1aSNick Terrell rankValOrigin[nbBits], minWeight, wEnd, 1017*2aa14b1aSNick Terrell sortedList, rankStart, 1018*2aa14b1aSNick Terrell nbBitsBaseline, sortedList[s].symbol); 1019*2aa14b1aSNick Terrell start += length; 1020*2aa14b1aSNick Terrell } 1021e0c1b49fSNick Terrell } else { 1022*2aa14b1aSNick Terrell /* Only a single symbol. */ 1023*2aa14b1aSNick Terrell HUF_fillDTableX2ForWeight( 1024*2aa14b1aSNick Terrell DTable + rankVal[w], 1025*2aa14b1aSNick Terrell sortedList + begin, sortedList + end, 1026*2aa14b1aSNick Terrell nbBits, targetLog, 1027*2aa14b1aSNick Terrell /* baseSeq */ 0, /* level */ 1); 1028*2aa14b1aSNick Terrell } 1029e0c1b49fSNick Terrell } 1030e0c1b49fSNick Terrell } 1031e0c1b49fSNick Terrell 1032e0c1b49fSNick Terrell typedef struct { 1033e0c1b49fSNick Terrell rankValCol_t rankVal[HUF_TABLELOG_MAX]; 1034e0c1b49fSNick Terrell U32 rankStats[HUF_TABLELOG_MAX + 1]; 1035*2aa14b1aSNick Terrell U32 rankStart0[HUF_TABLELOG_MAX + 3]; 1036e0c1b49fSNick Terrell sortedSymbol_t sortedSymbol[HUF_SYMBOLVALUE_MAX + 1]; 1037e0c1b49fSNick Terrell BYTE weightList[HUF_SYMBOLVALUE_MAX + 1]; 1038e0c1b49fSNick Terrell U32 calleeWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32]; 1039e0c1b49fSNick Terrell } HUF_ReadDTableX2_Workspace; 1040e0c1b49fSNick Terrell 1041e0c1b49fSNick Terrell size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, 1042e0c1b49fSNick Terrell const void* src, size_t srcSize, 1043e0c1b49fSNick Terrell void* workSpace, size_t wkspSize) 1044e0c1b49fSNick Terrell { 1045*2aa14b1aSNick Terrell return HUF_readDTableX2_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0); 1046*2aa14b1aSNick Terrell } 1047*2aa14b1aSNick Terrell 1048*2aa14b1aSNick Terrell size_t HUF_readDTableX2_wksp_bmi2(HUF_DTable* DTable, 1049*2aa14b1aSNick Terrell const void* src, size_t srcSize, 1050*2aa14b1aSNick Terrell void* workSpace, size_t wkspSize, int bmi2) 1051*2aa14b1aSNick Terrell { 1052*2aa14b1aSNick Terrell U32 tableLog, maxW, nbSymbols; 1053e0c1b49fSNick Terrell DTableDesc dtd = HUF_getDTableDesc(DTable); 1054*2aa14b1aSNick Terrell U32 maxTableLog = dtd.maxTableLog; 1055e0c1b49fSNick Terrell size_t iSize; 1056e0c1b49fSNick Terrell void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */ 1057e0c1b49fSNick Terrell HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr; 1058e0c1b49fSNick Terrell U32 *rankStart; 1059e0c1b49fSNick Terrell 1060e0c1b49fSNick Terrell HUF_ReadDTableX2_Workspace* const wksp = (HUF_ReadDTableX2_Workspace*)workSpace; 1061e0c1b49fSNick Terrell 1062e0c1b49fSNick Terrell if (sizeof(*wksp) > wkspSize) return ERROR(GENERIC); 1063e0c1b49fSNick Terrell 1064e0c1b49fSNick Terrell rankStart = wksp->rankStart0 + 1; 1065e0c1b49fSNick Terrell ZSTD_memset(wksp->rankStats, 0, sizeof(wksp->rankStats)); 1066e0c1b49fSNick Terrell ZSTD_memset(wksp->rankStart0, 0, sizeof(wksp->rankStart0)); 1067e0c1b49fSNick Terrell 1068e0c1b49fSNick Terrell DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */ 1069e0c1b49fSNick Terrell if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); 1070e0c1b49fSNick Terrell /* ZSTD_memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */ 1071e0c1b49fSNick Terrell 1072*2aa14b1aSNick Terrell iSize = HUF_readStats_wksp(wksp->weightList, HUF_SYMBOLVALUE_MAX + 1, wksp->rankStats, &nbSymbols, &tableLog, src, srcSize, wksp->calleeWksp, sizeof(wksp->calleeWksp), bmi2); 1073e0c1b49fSNick Terrell if (HUF_isError(iSize)) return iSize; 1074e0c1b49fSNick Terrell 1075e0c1b49fSNick Terrell /* check result */ 1076e0c1b49fSNick Terrell if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */ 1077*2aa14b1aSNick Terrell if (tableLog <= HUF_DECODER_FAST_TABLELOG && maxTableLog > HUF_DECODER_FAST_TABLELOG) maxTableLog = HUF_DECODER_FAST_TABLELOG; 1078e0c1b49fSNick Terrell 1079e0c1b49fSNick Terrell /* find maxWeight */ 1080e0c1b49fSNick Terrell for (maxW = tableLog; wksp->rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */ 1081e0c1b49fSNick Terrell 1082e0c1b49fSNick Terrell /* Get start index of each weight */ 1083e0c1b49fSNick Terrell { U32 w, nextRankStart = 0; 1084e0c1b49fSNick Terrell for (w=1; w<maxW+1; w++) { 1085e0c1b49fSNick Terrell U32 curr = nextRankStart; 1086e0c1b49fSNick Terrell nextRankStart += wksp->rankStats[w]; 1087e0c1b49fSNick Terrell rankStart[w] = curr; 1088e0c1b49fSNick Terrell } 1089e0c1b49fSNick Terrell rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/ 1090*2aa14b1aSNick Terrell rankStart[maxW+1] = nextRankStart; 1091e0c1b49fSNick Terrell } 1092e0c1b49fSNick Terrell 1093e0c1b49fSNick Terrell /* sort symbols by weight */ 1094e0c1b49fSNick Terrell { U32 s; 1095e0c1b49fSNick Terrell for (s=0; s<nbSymbols; s++) { 1096e0c1b49fSNick Terrell U32 const w = wksp->weightList[s]; 1097e0c1b49fSNick Terrell U32 const r = rankStart[w]++; 1098e0c1b49fSNick Terrell wksp->sortedSymbol[r].symbol = (BYTE)s; 1099e0c1b49fSNick Terrell } 1100e0c1b49fSNick Terrell rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */ 1101e0c1b49fSNick Terrell } 1102e0c1b49fSNick Terrell 1103e0c1b49fSNick Terrell /* Build rankVal */ 1104e0c1b49fSNick Terrell { U32* const rankVal0 = wksp->rankVal[0]; 1105e0c1b49fSNick Terrell { int const rescale = (maxTableLog-tableLog) - 1; /* tableLog <= maxTableLog */ 1106e0c1b49fSNick Terrell U32 nextRankVal = 0; 1107e0c1b49fSNick Terrell U32 w; 1108e0c1b49fSNick Terrell for (w=1; w<maxW+1; w++) { 1109e0c1b49fSNick Terrell U32 curr = nextRankVal; 1110e0c1b49fSNick Terrell nextRankVal += wksp->rankStats[w] << (w+rescale); 1111e0c1b49fSNick Terrell rankVal0[w] = curr; 1112e0c1b49fSNick Terrell } } 1113e0c1b49fSNick Terrell { U32 const minBits = tableLog+1 - maxW; 1114e0c1b49fSNick Terrell U32 consumed; 1115e0c1b49fSNick Terrell for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) { 1116e0c1b49fSNick Terrell U32* const rankValPtr = wksp->rankVal[consumed]; 1117e0c1b49fSNick Terrell U32 w; 1118e0c1b49fSNick Terrell for (w = 1; w < maxW+1; w++) { 1119e0c1b49fSNick Terrell rankValPtr[w] = rankVal0[w] >> consumed; 1120e0c1b49fSNick Terrell } } } } 1121e0c1b49fSNick Terrell 1122e0c1b49fSNick Terrell HUF_fillDTableX2(dt, maxTableLog, 1123*2aa14b1aSNick Terrell wksp->sortedSymbol, 1124e0c1b49fSNick Terrell wksp->rankStart0, wksp->rankVal, maxW, 1125*2aa14b1aSNick Terrell tableLog+1); 1126e0c1b49fSNick Terrell 1127e0c1b49fSNick Terrell dtd.tableLog = (BYTE)maxTableLog; 1128e0c1b49fSNick Terrell dtd.tableType = 1; 1129e0c1b49fSNick Terrell ZSTD_memcpy(DTable, &dtd, sizeof(dtd)); 1130e0c1b49fSNick Terrell return iSize; 1131e0c1b49fSNick Terrell } 1132e0c1b49fSNick Terrell 1133e0c1b49fSNick Terrell 1134e0c1b49fSNick Terrell FORCE_INLINE_TEMPLATE U32 1135e0c1b49fSNick Terrell HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog) 1136e0c1b49fSNick Terrell { 1137e0c1b49fSNick Terrell size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ 1138*2aa14b1aSNick Terrell ZSTD_memcpy(op, &dt[val].sequence, 2); 1139e0c1b49fSNick Terrell BIT_skipBits(DStream, dt[val].nbBits); 1140e0c1b49fSNick Terrell return dt[val].length; 1141e0c1b49fSNick Terrell } 1142e0c1b49fSNick Terrell 1143e0c1b49fSNick Terrell FORCE_INLINE_TEMPLATE U32 1144e0c1b49fSNick Terrell HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog) 1145e0c1b49fSNick Terrell { 1146e0c1b49fSNick Terrell size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ 1147*2aa14b1aSNick Terrell ZSTD_memcpy(op, &dt[val].sequence, 1); 1148*2aa14b1aSNick Terrell if (dt[val].length==1) { 1149*2aa14b1aSNick Terrell BIT_skipBits(DStream, dt[val].nbBits); 1150*2aa14b1aSNick Terrell } else { 1151e0c1b49fSNick Terrell if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) { 1152e0c1b49fSNick Terrell BIT_skipBits(DStream, dt[val].nbBits); 1153e0c1b49fSNick Terrell if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8)) 1154e0c1b49fSNick Terrell /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */ 1155e0c1b49fSNick Terrell DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8); 1156*2aa14b1aSNick Terrell } 1157*2aa14b1aSNick Terrell } 1158e0c1b49fSNick Terrell return 1; 1159e0c1b49fSNick Terrell } 1160e0c1b49fSNick Terrell 1161e0c1b49fSNick Terrell #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \ 1162e0c1b49fSNick Terrell ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) 1163e0c1b49fSNick Terrell 1164e0c1b49fSNick Terrell #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \ 1165e0c1b49fSNick Terrell if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ 1166e0c1b49fSNick Terrell ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) 1167e0c1b49fSNick Terrell 1168e0c1b49fSNick Terrell #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \ 1169e0c1b49fSNick Terrell if (MEM_64bits()) \ 1170e0c1b49fSNick Terrell ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) 1171e0c1b49fSNick Terrell 1172e0c1b49fSNick Terrell HINT_INLINE size_t 1173e0c1b49fSNick Terrell HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, 1174e0c1b49fSNick Terrell const HUF_DEltX2* const dt, const U32 dtLog) 1175e0c1b49fSNick Terrell { 1176e0c1b49fSNick Terrell BYTE* const pStart = p; 1177e0c1b49fSNick Terrell 1178e0c1b49fSNick Terrell /* up to 8 symbols at a time */ 1179*2aa14b1aSNick Terrell if ((size_t)(pEnd - p) >= sizeof(bitDPtr->bitContainer)) { 1180*2aa14b1aSNick Terrell if (dtLog <= 11 && MEM_64bits()) { 1181*2aa14b1aSNick Terrell /* up to 10 symbols at a time */ 1182*2aa14b1aSNick Terrell while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-9)) { 1183*2aa14b1aSNick Terrell HUF_DECODE_SYMBOLX2_0(p, bitDPtr); 1184*2aa14b1aSNick Terrell HUF_DECODE_SYMBOLX2_0(p, bitDPtr); 1185*2aa14b1aSNick Terrell HUF_DECODE_SYMBOLX2_0(p, bitDPtr); 1186*2aa14b1aSNick Terrell HUF_DECODE_SYMBOLX2_0(p, bitDPtr); 1187*2aa14b1aSNick Terrell HUF_DECODE_SYMBOLX2_0(p, bitDPtr); 1188*2aa14b1aSNick Terrell } 1189*2aa14b1aSNick Terrell } else { 1190*2aa14b1aSNick Terrell /* up to 8 symbols at a time */ 1191e0c1b49fSNick Terrell while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) { 1192e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_2(p, bitDPtr); 1193e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_1(p, bitDPtr); 1194e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_2(p, bitDPtr); 1195e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_0(p, bitDPtr); 1196e0c1b49fSNick Terrell } 1197*2aa14b1aSNick Terrell } 1198*2aa14b1aSNick Terrell } else { 1199*2aa14b1aSNick Terrell BIT_reloadDStream(bitDPtr); 1200*2aa14b1aSNick Terrell } 1201e0c1b49fSNick Terrell 1202e0c1b49fSNick Terrell /* closer to end : up to 2 symbols at a time */ 1203*2aa14b1aSNick Terrell if ((size_t)(pEnd - p) >= 2) { 1204e0c1b49fSNick Terrell while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2)) 1205e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_0(p, bitDPtr); 1206e0c1b49fSNick Terrell 1207e0c1b49fSNick Terrell while (p <= pEnd-2) 1208e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */ 1209*2aa14b1aSNick Terrell } 1210e0c1b49fSNick Terrell 1211e0c1b49fSNick Terrell if (p < pEnd) 1212e0c1b49fSNick Terrell p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog); 1213e0c1b49fSNick Terrell 1214e0c1b49fSNick Terrell return p-pStart; 1215e0c1b49fSNick Terrell } 1216e0c1b49fSNick Terrell 1217e0c1b49fSNick Terrell FORCE_INLINE_TEMPLATE size_t 1218e0c1b49fSNick Terrell HUF_decompress1X2_usingDTable_internal_body( 1219e0c1b49fSNick Terrell void* dst, size_t dstSize, 1220e0c1b49fSNick Terrell const void* cSrc, size_t cSrcSize, 1221e0c1b49fSNick Terrell const HUF_DTable* DTable) 1222e0c1b49fSNick Terrell { 1223e0c1b49fSNick Terrell BIT_DStream_t bitD; 1224e0c1b49fSNick Terrell 1225e0c1b49fSNick Terrell /* Init */ 1226e0c1b49fSNick Terrell CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) ); 1227e0c1b49fSNick Terrell 1228e0c1b49fSNick Terrell /* decode */ 1229e0c1b49fSNick Terrell { BYTE* const ostart = (BYTE*) dst; 1230e0c1b49fSNick Terrell BYTE* const oend = ostart + dstSize; 1231e0c1b49fSNick Terrell const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */ 1232e0c1b49fSNick Terrell const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; 1233e0c1b49fSNick Terrell DTableDesc const dtd = HUF_getDTableDesc(DTable); 1234e0c1b49fSNick Terrell HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog); 1235e0c1b49fSNick Terrell } 1236e0c1b49fSNick Terrell 1237e0c1b49fSNick Terrell /* check */ 1238e0c1b49fSNick Terrell if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected); 1239e0c1b49fSNick Terrell 1240e0c1b49fSNick Terrell /* decoded size */ 1241e0c1b49fSNick Terrell return dstSize; 1242e0c1b49fSNick Terrell } 1243e0c1b49fSNick Terrell FORCE_INLINE_TEMPLATE size_t 1244e0c1b49fSNick Terrell HUF_decompress4X2_usingDTable_internal_body( 1245e0c1b49fSNick Terrell void* dst, size_t dstSize, 1246e0c1b49fSNick Terrell const void* cSrc, size_t cSrcSize, 1247e0c1b49fSNick Terrell const HUF_DTable* DTable) 1248e0c1b49fSNick Terrell { 1249e0c1b49fSNick Terrell if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ 1250e0c1b49fSNick Terrell 1251e0c1b49fSNick Terrell { const BYTE* const istart = (const BYTE*) cSrc; 1252e0c1b49fSNick Terrell BYTE* const ostart = (BYTE*) dst; 1253e0c1b49fSNick Terrell BYTE* const oend = ostart + dstSize; 1254e0c1b49fSNick Terrell BYTE* const olimit = oend - (sizeof(size_t)-1); 1255e0c1b49fSNick Terrell const void* const dtPtr = DTable+1; 1256e0c1b49fSNick Terrell const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; 1257e0c1b49fSNick Terrell 1258e0c1b49fSNick Terrell /* Init */ 1259e0c1b49fSNick Terrell BIT_DStream_t bitD1; 1260e0c1b49fSNick Terrell BIT_DStream_t bitD2; 1261e0c1b49fSNick Terrell BIT_DStream_t bitD3; 1262e0c1b49fSNick Terrell BIT_DStream_t bitD4; 1263e0c1b49fSNick Terrell size_t const length1 = MEM_readLE16(istart); 1264e0c1b49fSNick Terrell size_t const length2 = MEM_readLE16(istart+2); 1265e0c1b49fSNick Terrell size_t const length3 = MEM_readLE16(istart+4); 1266e0c1b49fSNick Terrell size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6); 1267e0c1b49fSNick Terrell const BYTE* const istart1 = istart + 6; /* jumpTable */ 1268e0c1b49fSNick Terrell const BYTE* const istart2 = istart1 + length1; 1269e0c1b49fSNick Terrell const BYTE* const istart3 = istart2 + length2; 1270e0c1b49fSNick Terrell const BYTE* const istart4 = istart3 + length3; 1271e0c1b49fSNick Terrell size_t const segmentSize = (dstSize+3) / 4; 1272e0c1b49fSNick Terrell BYTE* const opStart2 = ostart + segmentSize; 1273e0c1b49fSNick Terrell BYTE* const opStart3 = opStart2 + segmentSize; 1274e0c1b49fSNick Terrell BYTE* const opStart4 = opStart3 + segmentSize; 1275e0c1b49fSNick Terrell BYTE* op1 = ostart; 1276e0c1b49fSNick Terrell BYTE* op2 = opStart2; 1277e0c1b49fSNick Terrell BYTE* op3 = opStart3; 1278e0c1b49fSNick Terrell BYTE* op4 = opStart4; 1279e0c1b49fSNick Terrell U32 endSignal = 1; 1280e0c1b49fSNick Terrell DTableDesc const dtd = HUF_getDTableDesc(DTable); 1281e0c1b49fSNick Terrell U32 const dtLog = dtd.tableLog; 1282e0c1b49fSNick Terrell 1283e0c1b49fSNick Terrell if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ 1284*2aa14b1aSNick Terrell if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */ 1285e0c1b49fSNick Terrell CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); 1286e0c1b49fSNick Terrell CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); 1287e0c1b49fSNick Terrell CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); 1288e0c1b49fSNick Terrell CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); 1289e0c1b49fSNick Terrell 1290e0c1b49fSNick Terrell /* 16-32 symbols per loop (4-8 symbols per stream) */ 1291*2aa14b1aSNick Terrell if ((size_t)(oend - op4) >= sizeof(size_t)) { 1292e0c1b49fSNick Terrell for ( ; (endSignal) & (op4 < olimit); ) { 1293e0c1b49fSNick Terrell #if defined(__clang__) && (defined(__x86_64__) || defined(__i386__)) 1294e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_2(op1, &bitD1); 1295e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_1(op1, &bitD1); 1296e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_2(op1, &bitD1); 1297e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_0(op1, &bitD1); 1298e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_2(op2, &bitD2); 1299e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_1(op2, &bitD2); 1300e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_2(op2, &bitD2); 1301e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_0(op2, &bitD2); 1302e0c1b49fSNick Terrell endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished; 1303e0c1b49fSNick Terrell endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished; 1304e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_2(op3, &bitD3); 1305e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_1(op3, &bitD3); 1306e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_2(op3, &bitD3); 1307e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_0(op3, &bitD3); 1308e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_2(op4, &bitD4); 1309e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_1(op4, &bitD4); 1310e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_2(op4, &bitD4); 1311e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_0(op4, &bitD4); 1312e0c1b49fSNick Terrell endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished; 1313e0c1b49fSNick Terrell endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished; 1314e0c1b49fSNick Terrell #else 1315e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_2(op1, &bitD1); 1316e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_2(op2, &bitD2); 1317e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_2(op3, &bitD3); 1318e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_2(op4, &bitD4); 1319e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_1(op1, &bitD1); 1320e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_1(op2, &bitD2); 1321e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_1(op3, &bitD3); 1322e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_1(op4, &bitD4); 1323e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_2(op1, &bitD1); 1324e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_2(op2, &bitD2); 1325e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_2(op3, &bitD3); 1326e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_2(op4, &bitD4); 1327e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_0(op1, &bitD1); 1328e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_0(op2, &bitD2); 1329e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_0(op3, &bitD3); 1330e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_0(op4, &bitD4); 13310a8ea235SNathan Chancellor endSignal = (U32)LIKELY((U32) 1332e0c1b49fSNick Terrell (BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished) 1333e0c1b49fSNick Terrell & (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished) 1334e0c1b49fSNick Terrell & (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished) 1335e0c1b49fSNick Terrell & (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished)); 1336e0c1b49fSNick Terrell #endif 1337e0c1b49fSNick Terrell } 1338*2aa14b1aSNick Terrell } 1339e0c1b49fSNick Terrell 1340e0c1b49fSNick Terrell /* check corruption */ 1341e0c1b49fSNick Terrell if (op1 > opStart2) return ERROR(corruption_detected); 1342e0c1b49fSNick Terrell if (op2 > opStart3) return ERROR(corruption_detected); 1343e0c1b49fSNick Terrell if (op3 > opStart4) return ERROR(corruption_detected); 1344e0c1b49fSNick Terrell /* note : op4 already verified within main loop */ 1345e0c1b49fSNick Terrell 1346e0c1b49fSNick Terrell /* finish bitStreams one by one */ 1347e0c1b49fSNick Terrell HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog); 1348e0c1b49fSNick Terrell HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog); 1349e0c1b49fSNick Terrell HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog); 1350e0c1b49fSNick Terrell HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog); 1351e0c1b49fSNick Terrell 1352e0c1b49fSNick Terrell /* check */ 1353e0c1b49fSNick Terrell { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); 1354e0c1b49fSNick Terrell if (!endCheck) return ERROR(corruption_detected); } 1355e0c1b49fSNick Terrell 1356e0c1b49fSNick Terrell /* decoded size */ 1357e0c1b49fSNick Terrell return dstSize; 1358e0c1b49fSNick Terrell } 1359e0c1b49fSNick Terrell } 1360e0c1b49fSNick Terrell 1361*2aa14b1aSNick Terrell #if HUF_NEED_BMI2_FUNCTION 1362*2aa14b1aSNick Terrell static BMI2_TARGET_ATTRIBUTE 1363*2aa14b1aSNick Terrell size_t HUF_decompress4X2_usingDTable_internal_bmi2(void* dst, size_t dstSize, void const* cSrc, 1364*2aa14b1aSNick Terrell size_t cSrcSize, HUF_DTable const* DTable) { 1365*2aa14b1aSNick Terrell return HUF_decompress4X2_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable); 1366*2aa14b1aSNick Terrell } 1367*2aa14b1aSNick Terrell #endif 1368*2aa14b1aSNick Terrell 1369*2aa14b1aSNick Terrell #if HUF_NEED_DEFAULT_FUNCTION 1370*2aa14b1aSNick Terrell static 1371*2aa14b1aSNick Terrell size_t HUF_decompress4X2_usingDTable_internal_default(void* dst, size_t dstSize, void const* cSrc, 1372*2aa14b1aSNick Terrell size_t cSrcSize, HUF_DTable const* DTable) { 1373*2aa14b1aSNick Terrell return HUF_decompress4X2_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable); 1374*2aa14b1aSNick Terrell } 1375*2aa14b1aSNick Terrell #endif 1376*2aa14b1aSNick Terrell 1377*2aa14b1aSNick Terrell #if ZSTD_ENABLE_ASM_X86_64_BMI2 1378*2aa14b1aSNick Terrell 1379*2aa14b1aSNick Terrell HUF_ASM_DECL void HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop(HUF_DecompressAsmArgs* args) ZSTDLIB_HIDDEN; 1380*2aa14b1aSNick Terrell 1381*2aa14b1aSNick Terrell static HUF_ASM_X86_64_BMI2_ATTRS size_t 1382*2aa14b1aSNick Terrell HUF_decompress4X2_usingDTable_internal_bmi2_asm( 1383*2aa14b1aSNick Terrell void* dst, size_t dstSize, 1384*2aa14b1aSNick Terrell const void* cSrc, size_t cSrcSize, 1385*2aa14b1aSNick Terrell const HUF_DTable* DTable) { 1386*2aa14b1aSNick Terrell void const* dt = DTable + 1; 1387*2aa14b1aSNick Terrell const BYTE* const iend = (const BYTE*)cSrc + 6; 1388*2aa14b1aSNick Terrell BYTE* const oend = (BYTE*)dst + dstSize; 1389*2aa14b1aSNick Terrell HUF_DecompressAsmArgs args; 1390*2aa14b1aSNick Terrell { 1391*2aa14b1aSNick Terrell size_t const ret = HUF_DecompressAsmArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable); 1392*2aa14b1aSNick Terrell FORWARD_IF_ERROR(ret, "Failed to init asm args"); 1393*2aa14b1aSNick Terrell if (ret != 0) 1394*2aa14b1aSNick Terrell return HUF_decompress4X2_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); 1395*2aa14b1aSNick Terrell } 1396*2aa14b1aSNick Terrell 1397*2aa14b1aSNick Terrell assert(args.ip[0] >= args.ilimit); 1398*2aa14b1aSNick Terrell HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop(&args); 1399*2aa14b1aSNick Terrell 1400*2aa14b1aSNick Terrell /* note : op4 already verified within main loop */ 1401*2aa14b1aSNick Terrell assert(args.ip[0] >= iend); 1402*2aa14b1aSNick Terrell assert(args.ip[1] >= iend); 1403*2aa14b1aSNick Terrell assert(args.ip[2] >= iend); 1404*2aa14b1aSNick Terrell assert(args.ip[3] >= iend); 1405*2aa14b1aSNick Terrell assert(args.op[3] <= oend); 1406*2aa14b1aSNick Terrell (void)iend; 1407*2aa14b1aSNick Terrell 1408*2aa14b1aSNick Terrell /* finish bitStreams one by one */ 1409*2aa14b1aSNick Terrell { 1410*2aa14b1aSNick Terrell size_t const segmentSize = (dstSize+3) / 4; 1411*2aa14b1aSNick Terrell BYTE* segmentEnd = (BYTE*)dst; 1412*2aa14b1aSNick Terrell int i; 1413*2aa14b1aSNick Terrell for (i = 0; i < 4; ++i) { 1414*2aa14b1aSNick Terrell BIT_DStream_t bit; 1415*2aa14b1aSNick Terrell if (segmentSize <= (size_t)(oend - segmentEnd)) 1416*2aa14b1aSNick Terrell segmentEnd += segmentSize; 1417*2aa14b1aSNick Terrell else 1418*2aa14b1aSNick Terrell segmentEnd = oend; 1419*2aa14b1aSNick Terrell FORWARD_IF_ERROR(HUF_initRemainingDStream(&bit, &args, i, segmentEnd), "corruption"); 1420*2aa14b1aSNick Terrell args.op[i] += HUF_decodeStreamX2(args.op[i], &bit, segmentEnd, (HUF_DEltX2 const*)dt, HUF_DECODER_FAST_TABLELOG); 1421*2aa14b1aSNick Terrell if (args.op[i] != segmentEnd) 1422*2aa14b1aSNick Terrell return ERROR(corruption_detected); 1423*2aa14b1aSNick Terrell } 1424*2aa14b1aSNick Terrell } 1425*2aa14b1aSNick Terrell 1426*2aa14b1aSNick Terrell /* decoded size */ 1427*2aa14b1aSNick Terrell return dstSize; 1428*2aa14b1aSNick Terrell } 1429*2aa14b1aSNick Terrell #endif /* ZSTD_ENABLE_ASM_X86_64_BMI2 */ 1430*2aa14b1aSNick Terrell 1431*2aa14b1aSNick Terrell static size_t HUF_decompress4X2_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc, 1432*2aa14b1aSNick Terrell size_t cSrcSize, HUF_DTable const* DTable, int bmi2) 1433*2aa14b1aSNick Terrell { 1434*2aa14b1aSNick Terrell #if DYNAMIC_BMI2 1435*2aa14b1aSNick Terrell if (bmi2) { 1436*2aa14b1aSNick Terrell # if ZSTD_ENABLE_ASM_X86_64_BMI2 1437*2aa14b1aSNick Terrell return HUF_decompress4X2_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable); 1438*2aa14b1aSNick Terrell # else 1439*2aa14b1aSNick Terrell return HUF_decompress4X2_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); 1440*2aa14b1aSNick Terrell # endif 1441*2aa14b1aSNick Terrell } 1442*2aa14b1aSNick Terrell #else 1443*2aa14b1aSNick Terrell (void)bmi2; 1444*2aa14b1aSNick Terrell #endif 1445*2aa14b1aSNick Terrell 1446*2aa14b1aSNick Terrell #if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__) 1447*2aa14b1aSNick Terrell return HUF_decompress4X2_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable); 1448*2aa14b1aSNick Terrell #else 1449*2aa14b1aSNick Terrell return HUF_decompress4X2_usingDTable_internal_default(dst, dstSize, cSrc, cSrcSize, DTable); 1450*2aa14b1aSNick Terrell #endif 1451*2aa14b1aSNick Terrell } 1452*2aa14b1aSNick Terrell 1453e0c1b49fSNick Terrell HUF_DGEN(HUF_decompress1X2_usingDTable_internal) 1454e0c1b49fSNick Terrell 1455e0c1b49fSNick Terrell size_t HUF_decompress1X2_usingDTable( 1456e0c1b49fSNick Terrell void* dst, size_t dstSize, 1457e0c1b49fSNick Terrell const void* cSrc, size_t cSrcSize, 1458e0c1b49fSNick Terrell const HUF_DTable* DTable) 1459e0c1b49fSNick Terrell { 1460e0c1b49fSNick Terrell DTableDesc dtd = HUF_getDTableDesc(DTable); 1461e0c1b49fSNick Terrell if (dtd.tableType != 1) return ERROR(GENERIC); 1462e0c1b49fSNick Terrell return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 1463e0c1b49fSNick Terrell } 1464e0c1b49fSNick Terrell 1465e0c1b49fSNick Terrell size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, 1466e0c1b49fSNick Terrell const void* cSrc, size_t cSrcSize, 1467e0c1b49fSNick Terrell void* workSpace, size_t wkspSize) 1468e0c1b49fSNick Terrell { 1469e0c1b49fSNick Terrell const BYTE* ip = (const BYTE*) cSrc; 1470e0c1b49fSNick Terrell 1471e0c1b49fSNick Terrell size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, 1472e0c1b49fSNick Terrell workSpace, wkspSize); 1473e0c1b49fSNick Terrell if (HUF_isError(hSize)) return hSize; 1474e0c1b49fSNick Terrell if (hSize >= cSrcSize) return ERROR(srcSize_wrong); 1475e0c1b49fSNick Terrell ip += hSize; cSrcSize -= hSize; 1476e0c1b49fSNick Terrell 1477e0c1b49fSNick Terrell return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0); 1478e0c1b49fSNick Terrell } 1479e0c1b49fSNick Terrell 1480e0c1b49fSNick Terrell 1481e0c1b49fSNick Terrell size_t HUF_decompress4X2_usingDTable( 1482e0c1b49fSNick Terrell void* dst, size_t dstSize, 1483e0c1b49fSNick Terrell const void* cSrc, size_t cSrcSize, 1484e0c1b49fSNick Terrell const HUF_DTable* DTable) 1485e0c1b49fSNick Terrell { 1486e0c1b49fSNick Terrell DTableDesc dtd = HUF_getDTableDesc(DTable); 1487e0c1b49fSNick Terrell if (dtd.tableType != 1) return ERROR(GENERIC); 1488e0c1b49fSNick Terrell return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 1489e0c1b49fSNick Terrell } 1490e0c1b49fSNick Terrell 1491e0c1b49fSNick Terrell static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, 1492e0c1b49fSNick Terrell const void* cSrc, size_t cSrcSize, 1493e0c1b49fSNick Terrell void* workSpace, size_t wkspSize, int bmi2) 1494e0c1b49fSNick Terrell { 1495e0c1b49fSNick Terrell const BYTE* ip = (const BYTE*) cSrc; 1496e0c1b49fSNick Terrell 1497e0c1b49fSNick Terrell size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize, 1498e0c1b49fSNick Terrell workSpace, wkspSize); 1499e0c1b49fSNick Terrell if (HUF_isError(hSize)) return hSize; 1500e0c1b49fSNick Terrell if (hSize >= cSrcSize) return ERROR(srcSize_wrong); 1501e0c1b49fSNick Terrell ip += hSize; cSrcSize -= hSize; 1502e0c1b49fSNick Terrell 1503e0c1b49fSNick Terrell return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); 1504e0c1b49fSNick Terrell } 1505e0c1b49fSNick Terrell 1506e0c1b49fSNick Terrell size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, 1507e0c1b49fSNick Terrell const void* cSrc, size_t cSrcSize, 1508e0c1b49fSNick Terrell void* workSpace, size_t wkspSize) 1509e0c1b49fSNick Terrell { 1510e0c1b49fSNick Terrell return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0); 1511e0c1b49fSNick Terrell } 1512e0c1b49fSNick Terrell 1513e0c1b49fSNick Terrell 1514e0c1b49fSNick Terrell #endif /* HUF_FORCE_DECOMPRESS_X1 */ 1515e0c1b49fSNick Terrell 1516e0c1b49fSNick Terrell 1517e0c1b49fSNick Terrell /* ***********************************/ 1518e0c1b49fSNick Terrell /* Universal decompression selectors */ 1519e0c1b49fSNick Terrell /* ***********************************/ 1520e0c1b49fSNick Terrell 1521e0c1b49fSNick Terrell size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, 1522e0c1b49fSNick Terrell const void* cSrc, size_t cSrcSize, 1523e0c1b49fSNick Terrell const HUF_DTable* DTable) 1524e0c1b49fSNick Terrell { 1525e0c1b49fSNick Terrell DTableDesc const dtd = HUF_getDTableDesc(DTable); 1526e0c1b49fSNick Terrell #if defined(HUF_FORCE_DECOMPRESS_X1) 1527e0c1b49fSNick Terrell (void)dtd; 1528e0c1b49fSNick Terrell assert(dtd.tableType == 0); 1529e0c1b49fSNick Terrell return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 1530e0c1b49fSNick Terrell #elif defined(HUF_FORCE_DECOMPRESS_X2) 1531e0c1b49fSNick Terrell (void)dtd; 1532e0c1b49fSNick Terrell assert(dtd.tableType == 1); 1533e0c1b49fSNick Terrell return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 1534e0c1b49fSNick Terrell #else 1535e0c1b49fSNick Terrell return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : 1536e0c1b49fSNick Terrell HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 1537e0c1b49fSNick Terrell #endif 1538e0c1b49fSNick Terrell } 1539e0c1b49fSNick Terrell 1540e0c1b49fSNick Terrell size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, 1541e0c1b49fSNick Terrell const void* cSrc, size_t cSrcSize, 1542e0c1b49fSNick Terrell const HUF_DTable* DTable) 1543e0c1b49fSNick Terrell { 1544e0c1b49fSNick Terrell DTableDesc const dtd = HUF_getDTableDesc(DTable); 1545e0c1b49fSNick Terrell #if defined(HUF_FORCE_DECOMPRESS_X1) 1546e0c1b49fSNick Terrell (void)dtd; 1547e0c1b49fSNick Terrell assert(dtd.tableType == 0); 1548e0c1b49fSNick Terrell return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 1549e0c1b49fSNick Terrell #elif defined(HUF_FORCE_DECOMPRESS_X2) 1550e0c1b49fSNick Terrell (void)dtd; 1551e0c1b49fSNick Terrell assert(dtd.tableType == 1); 1552e0c1b49fSNick Terrell return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 1553e0c1b49fSNick Terrell #else 1554e0c1b49fSNick Terrell return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : 1555e0c1b49fSNick Terrell HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 1556e0c1b49fSNick Terrell #endif 1557e0c1b49fSNick Terrell } 1558e0c1b49fSNick Terrell 1559e0c1b49fSNick Terrell 1560e0c1b49fSNick Terrell #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2) 1561e0c1b49fSNick Terrell typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t; 1562*2aa14b1aSNick Terrell static const algo_time_t algoTime[16 /* Quantization */][2 /* single, double */] = 1563e0c1b49fSNick Terrell { 1564e0c1b49fSNick Terrell /* single, double, quad */ 1565*2aa14b1aSNick Terrell {{0,0}, {1,1}}, /* Q==0 : impossible */ 1566*2aa14b1aSNick Terrell {{0,0}, {1,1}}, /* Q==1 : impossible */ 1567*2aa14b1aSNick Terrell {{ 150,216}, { 381,119}}, /* Q == 2 : 12-18% */ 1568*2aa14b1aSNick Terrell {{ 170,205}, { 514,112}}, /* Q == 3 : 18-25% */ 1569*2aa14b1aSNick Terrell {{ 177,199}, { 539,110}}, /* Q == 4 : 25-32% */ 1570*2aa14b1aSNick Terrell {{ 197,194}, { 644,107}}, /* Q == 5 : 32-38% */ 1571*2aa14b1aSNick Terrell {{ 221,192}, { 735,107}}, /* Q == 6 : 38-44% */ 1572*2aa14b1aSNick Terrell {{ 256,189}, { 881,106}}, /* Q == 7 : 44-50% */ 1573*2aa14b1aSNick Terrell {{ 359,188}, {1167,109}}, /* Q == 8 : 50-56% */ 1574*2aa14b1aSNick Terrell {{ 582,187}, {1570,114}}, /* Q == 9 : 56-62% */ 1575*2aa14b1aSNick Terrell {{ 688,187}, {1712,122}}, /* Q ==10 : 62-69% */ 1576*2aa14b1aSNick Terrell {{ 825,186}, {1965,136}}, /* Q ==11 : 69-75% */ 1577*2aa14b1aSNick Terrell {{ 976,185}, {2131,150}}, /* Q ==12 : 75-81% */ 1578*2aa14b1aSNick Terrell {{1180,186}, {2070,175}}, /* Q ==13 : 81-87% */ 1579*2aa14b1aSNick Terrell {{1377,185}, {1731,202}}, /* Q ==14 : 87-93% */ 1580*2aa14b1aSNick Terrell {{1412,185}, {1695,202}}, /* Q ==15 : 93-99% */ 1581e0c1b49fSNick Terrell }; 1582e0c1b49fSNick Terrell #endif 1583e0c1b49fSNick Terrell 1584e0c1b49fSNick Terrell /* HUF_selectDecoder() : 1585e0c1b49fSNick Terrell * Tells which decoder is likely to decode faster, 1586e0c1b49fSNick Terrell * based on a set of pre-computed metrics. 1587e0c1b49fSNick Terrell * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 . 1588e0c1b49fSNick Terrell * Assumption : 0 < dstSize <= 128 KB */ 1589e0c1b49fSNick Terrell U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize) 1590e0c1b49fSNick Terrell { 1591e0c1b49fSNick Terrell assert(dstSize > 0); 1592e0c1b49fSNick Terrell assert(dstSize <= 128*1024); 1593e0c1b49fSNick Terrell #if defined(HUF_FORCE_DECOMPRESS_X1) 1594e0c1b49fSNick Terrell (void)dstSize; 1595e0c1b49fSNick Terrell (void)cSrcSize; 1596e0c1b49fSNick Terrell return 0; 1597e0c1b49fSNick Terrell #elif defined(HUF_FORCE_DECOMPRESS_X2) 1598e0c1b49fSNick Terrell (void)dstSize; 1599e0c1b49fSNick Terrell (void)cSrcSize; 1600e0c1b49fSNick Terrell return 1; 1601e0c1b49fSNick Terrell #else 1602e0c1b49fSNick Terrell /* decoder timing evaluation */ 1603e0c1b49fSNick Terrell { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */ 1604e0c1b49fSNick Terrell U32 const D256 = (U32)(dstSize >> 8); 1605e0c1b49fSNick Terrell U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256); 1606e0c1b49fSNick Terrell U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256); 1607*2aa14b1aSNick Terrell DTime1 += DTime1 >> 5; /* small advantage to algorithm using less memory, to reduce cache eviction */ 1608e0c1b49fSNick Terrell return DTime1 < DTime0; 1609e0c1b49fSNick Terrell } 1610e0c1b49fSNick Terrell #endif 1611e0c1b49fSNick Terrell } 1612e0c1b49fSNick Terrell 1613e0c1b49fSNick Terrell 1614e0c1b49fSNick Terrell size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, 1615e0c1b49fSNick Terrell size_t dstSize, const void* cSrc, 1616e0c1b49fSNick Terrell size_t cSrcSize, void* workSpace, 1617e0c1b49fSNick Terrell size_t wkspSize) 1618e0c1b49fSNick Terrell { 1619e0c1b49fSNick Terrell /* validation checks */ 1620e0c1b49fSNick Terrell if (dstSize == 0) return ERROR(dstSize_tooSmall); 1621e0c1b49fSNick Terrell if (cSrcSize == 0) return ERROR(corruption_detected); 1622e0c1b49fSNick Terrell 1623e0c1b49fSNick Terrell { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); 1624e0c1b49fSNick Terrell #if defined(HUF_FORCE_DECOMPRESS_X1) 1625e0c1b49fSNick Terrell (void)algoNb; 1626e0c1b49fSNick Terrell assert(algoNb == 0); 1627e0c1b49fSNick Terrell return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); 1628e0c1b49fSNick Terrell #elif defined(HUF_FORCE_DECOMPRESS_X2) 1629e0c1b49fSNick Terrell (void)algoNb; 1630e0c1b49fSNick Terrell assert(algoNb == 1); 1631e0c1b49fSNick Terrell return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); 1632e0c1b49fSNick Terrell #else 1633e0c1b49fSNick Terrell return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, 1634e0c1b49fSNick Terrell cSrcSize, workSpace, wkspSize): 1635e0c1b49fSNick Terrell HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); 1636e0c1b49fSNick Terrell #endif 1637e0c1b49fSNick Terrell } 1638e0c1b49fSNick Terrell } 1639e0c1b49fSNick Terrell 1640e0c1b49fSNick Terrell size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, 1641e0c1b49fSNick Terrell const void* cSrc, size_t cSrcSize, 1642e0c1b49fSNick Terrell void* workSpace, size_t wkspSize) 1643e0c1b49fSNick Terrell { 1644e0c1b49fSNick Terrell /* validation checks */ 1645e0c1b49fSNick Terrell if (dstSize == 0) return ERROR(dstSize_tooSmall); 1646e0c1b49fSNick Terrell if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ 1647e0c1b49fSNick Terrell if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ 1648e0c1b49fSNick Terrell if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ 1649e0c1b49fSNick Terrell 1650e0c1b49fSNick Terrell { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); 1651e0c1b49fSNick Terrell #if defined(HUF_FORCE_DECOMPRESS_X1) 1652e0c1b49fSNick Terrell (void)algoNb; 1653e0c1b49fSNick Terrell assert(algoNb == 0); 1654e0c1b49fSNick Terrell return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc, 1655e0c1b49fSNick Terrell cSrcSize, workSpace, wkspSize); 1656e0c1b49fSNick Terrell #elif defined(HUF_FORCE_DECOMPRESS_X2) 1657e0c1b49fSNick Terrell (void)algoNb; 1658e0c1b49fSNick Terrell assert(algoNb == 1); 1659e0c1b49fSNick Terrell return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, 1660e0c1b49fSNick Terrell cSrcSize, workSpace, wkspSize); 1661e0c1b49fSNick Terrell #else 1662e0c1b49fSNick Terrell return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, 1663e0c1b49fSNick Terrell cSrcSize, workSpace, wkspSize): 1664e0c1b49fSNick Terrell HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc, 1665e0c1b49fSNick Terrell cSrcSize, workSpace, wkspSize); 1666e0c1b49fSNick Terrell #endif 1667e0c1b49fSNick Terrell } 1668e0c1b49fSNick Terrell } 1669e0c1b49fSNick Terrell 1670e0c1b49fSNick Terrell 1671e0c1b49fSNick Terrell size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) 1672e0c1b49fSNick Terrell { 1673e0c1b49fSNick Terrell DTableDesc const dtd = HUF_getDTableDesc(DTable); 1674e0c1b49fSNick Terrell #if defined(HUF_FORCE_DECOMPRESS_X1) 1675e0c1b49fSNick Terrell (void)dtd; 1676e0c1b49fSNick Terrell assert(dtd.tableType == 0); 1677e0c1b49fSNick Terrell return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); 1678e0c1b49fSNick Terrell #elif defined(HUF_FORCE_DECOMPRESS_X2) 1679e0c1b49fSNick Terrell (void)dtd; 1680e0c1b49fSNick Terrell assert(dtd.tableType == 1); 1681e0c1b49fSNick Terrell return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); 1682e0c1b49fSNick Terrell #else 1683e0c1b49fSNick Terrell return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : 1684e0c1b49fSNick Terrell HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); 1685e0c1b49fSNick Terrell #endif 1686e0c1b49fSNick Terrell } 1687e0c1b49fSNick Terrell 1688e0c1b49fSNick Terrell #ifndef HUF_FORCE_DECOMPRESS_X2 1689e0c1b49fSNick Terrell size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) 1690e0c1b49fSNick Terrell { 1691e0c1b49fSNick Terrell const BYTE* ip = (const BYTE*) cSrc; 1692e0c1b49fSNick Terrell 1693e0c1b49fSNick Terrell size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2); 1694e0c1b49fSNick Terrell if (HUF_isError(hSize)) return hSize; 1695e0c1b49fSNick Terrell if (hSize >= cSrcSize) return ERROR(srcSize_wrong); 1696e0c1b49fSNick Terrell ip += hSize; cSrcSize -= hSize; 1697e0c1b49fSNick Terrell 1698e0c1b49fSNick Terrell return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); 1699e0c1b49fSNick Terrell } 1700e0c1b49fSNick Terrell #endif 1701e0c1b49fSNick Terrell 1702e0c1b49fSNick Terrell size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) 1703e0c1b49fSNick Terrell { 1704e0c1b49fSNick Terrell DTableDesc const dtd = HUF_getDTableDesc(DTable); 1705e0c1b49fSNick Terrell #if defined(HUF_FORCE_DECOMPRESS_X1) 1706e0c1b49fSNick Terrell (void)dtd; 1707e0c1b49fSNick Terrell assert(dtd.tableType == 0); 1708e0c1b49fSNick Terrell return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); 1709e0c1b49fSNick Terrell #elif defined(HUF_FORCE_DECOMPRESS_X2) 1710e0c1b49fSNick Terrell (void)dtd; 1711e0c1b49fSNick Terrell assert(dtd.tableType == 1); 1712e0c1b49fSNick Terrell return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); 1713e0c1b49fSNick Terrell #else 1714e0c1b49fSNick Terrell return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : 1715e0c1b49fSNick Terrell HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); 1716e0c1b49fSNick Terrell #endif 1717e0c1b49fSNick Terrell } 1718e0c1b49fSNick Terrell 1719e0c1b49fSNick Terrell size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) 1720e0c1b49fSNick Terrell { 1721e0c1b49fSNick Terrell /* validation checks */ 1722e0c1b49fSNick Terrell if (dstSize == 0) return ERROR(dstSize_tooSmall); 1723e0c1b49fSNick Terrell if (cSrcSize == 0) return ERROR(corruption_detected); 1724e0c1b49fSNick Terrell 1725e0c1b49fSNick Terrell { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); 1726e0c1b49fSNick Terrell #if defined(HUF_FORCE_DECOMPRESS_X1) 1727e0c1b49fSNick Terrell (void)algoNb; 1728e0c1b49fSNick Terrell assert(algoNb == 0); 1729e0c1b49fSNick Terrell return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); 1730e0c1b49fSNick Terrell #elif defined(HUF_FORCE_DECOMPRESS_X2) 1731e0c1b49fSNick Terrell (void)algoNb; 1732e0c1b49fSNick Terrell assert(algoNb == 1); 1733e0c1b49fSNick Terrell return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); 1734e0c1b49fSNick Terrell #else 1735e0c1b49fSNick Terrell return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) : 1736e0c1b49fSNick Terrell HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); 1737e0c1b49fSNick Terrell #endif 1738e0c1b49fSNick Terrell } 1739e0c1b49fSNick Terrell } 1740e0c1b49fSNick Terrell 1741