1e0c1b49fSNick Terrell /* ******************************************************************
2e0c1b49fSNick Terrell * huff0 huffman decoder,
3e0c1b49fSNick Terrell * part of Finite State Entropy library
4e0c1b49fSNick Terrell * Copyright (c) Yann Collet, Facebook, Inc.
5e0c1b49fSNick Terrell *
6e0c1b49fSNick Terrell * You can contact the author at :
7e0c1b49fSNick Terrell * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
8e0c1b49fSNick Terrell *
9e0c1b49fSNick Terrell * This source code is licensed under both the BSD-style license (found in the
10e0c1b49fSNick Terrell * LICENSE file in the root directory of this source tree) and the GPLv2 (found
11e0c1b49fSNick Terrell * in the COPYING file in the root directory of this source tree).
12e0c1b49fSNick Terrell * You may select, at your option, one of the above-listed licenses.
13e0c1b49fSNick Terrell ****************************************************************** */
14e0c1b49fSNick Terrell
15e0c1b49fSNick Terrell /* **************************************************************
16e0c1b49fSNick Terrell * Dependencies
17e0c1b49fSNick Terrell ****************************************************************/
18e0c1b49fSNick Terrell #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset */
19e0c1b49fSNick Terrell #include "../common/compiler.h"
20e0c1b49fSNick Terrell #include "../common/bitstream.h" /* BIT_* */
21e0c1b49fSNick Terrell #include "../common/fse.h" /* to compress headers */
22e0c1b49fSNick Terrell #define HUF_STATIC_LINKING_ONLY
23e0c1b49fSNick Terrell #include "../common/huf.h"
24e0c1b49fSNick Terrell #include "../common/error_private.h"
252aa14b1aSNick Terrell #include "../common/zstd_internal.h"
262aa14b1aSNick Terrell
272aa14b1aSNick Terrell /* **************************************************************
282aa14b1aSNick Terrell * Constants
292aa14b1aSNick Terrell ****************************************************************/
302aa14b1aSNick Terrell
312aa14b1aSNick Terrell #define HUF_DECODER_FAST_TABLELOG 11
32e0c1b49fSNick Terrell
33e0c1b49fSNick Terrell /* **************************************************************
34e0c1b49fSNick Terrell * Macros
35e0c1b49fSNick Terrell ****************************************************************/
36e0c1b49fSNick Terrell
37e0c1b49fSNick Terrell /* These two optional macros force the use one way or another of the two
38e0c1b49fSNick Terrell * Huffman decompression implementations. You can't force in both directions
39e0c1b49fSNick Terrell * at the same time.
40e0c1b49fSNick Terrell */
41e0c1b49fSNick Terrell #if defined(HUF_FORCE_DECOMPRESS_X1) && \
42e0c1b49fSNick Terrell defined(HUF_FORCE_DECOMPRESS_X2)
43e0c1b49fSNick Terrell #error "Cannot force the use of the X1 and X2 decoders at the same time!"
44e0c1b49fSNick Terrell #endif
45e0c1b49fSNick Terrell
462aa14b1aSNick Terrell #if ZSTD_ENABLE_ASM_X86_64_BMI2 && DYNAMIC_BMI2
472aa14b1aSNick Terrell # define HUF_ASM_X86_64_BMI2_ATTRS BMI2_TARGET_ATTRIBUTE
482aa14b1aSNick Terrell #else
492aa14b1aSNick Terrell # define HUF_ASM_X86_64_BMI2_ATTRS
502aa14b1aSNick Terrell #endif
512aa14b1aSNick Terrell
522aa14b1aSNick Terrell #define HUF_EXTERN_C
532aa14b1aSNick Terrell #define HUF_ASM_DECL HUF_EXTERN_C
542aa14b1aSNick Terrell
552aa14b1aSNick Terrell #if DYNAMIC_BMI2 || (ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__))
562aa14b1aSNick Terrell # define HUF_NEED_BMI2_FUNCTION 1
572aa14b1aSNick Terrell #else
582aa14b1aSNick Terrell # define HUF_NEED_BMI2_FUNCTION 0
592aa14b1aSNick Terrell #endif
602aa14b1aSNick Terrell
612aa14b1aSNick Terrell #if !(ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__))
622aa14b1aSNick Terrell # define HUF_NEED_DEFAULT_FUNCTION 1
632aa14b1aSNick Terrell #else
642aa14b1aSNick Terrell # define HUF_NEED_DEFAULT_FUNCTION 0
652aa14b1aSNick Terrell #endif
66e0c1b49fSNick Terrell
67e0c1b49fSNick Terrell /* **************************************************************
68e0c1b49fSNick Terrell * Error Management
69e0c1b49fSNick Terrell ****************************************************************/
70e0c1b49fSNick Terrell #define HUF_isError ERR_isError
71e0c1b49fSNick Terrell
72e0c1b49fSNick Terrell
73e0c1b49fSNick Terrell /* **************************************************************
74e0c1b49fSNick Terrell * Byte alignment for workSpace management
75e0c1b49fSNick Terrell ****************************************************************/
76e0c1b49fSNick Terrell #define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1)
77e0c1b49fSNick Terrell #define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
78e0c1b49fSNick Terrell
79e0c1b49fSNick Terrell
80e0c1b49fSNick Terrell /* **************************************************************
81e0c1b49fSNick Terrell * BMI2 Variant Wrappers
82e0c1b49fSNick Terrell ****************************************************************/
83e0c1b49fSNick Terrell #if DYNAMIC_BMI2
84e0c1b49fSNick Terrell
85e0c1b49fSNick Terrell #define HUF_DGEN(fn) \
86e0c1b49fSNick Terrell \
87e0c1b49fSNick Terrell static size_t fn##_default( \
88e0c1b49fSNick Terrell void* dst, size_t dstSize, \
89e0c1b49fSNick Terrell const void* cSrc, size_t cSrcSize, \
90e0c1b49fSNick Terrell const HUF_DTable* DTable) \
91e0c1b49fSNick Terrell { \
92e0c1b49fSNick Terrell return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
93e0c1b49fSNick Terrell } \
94e0c1b49fSNick Terrell \
952aa14b1aSNick Terrell static BMI2_TARGET_ATTRIBUTE size_t fn##_bmi2( \
96e0c1b49fSNick Terrell void* dst, size_t dstSize, \
97e0c1b49fSNick Terrell const void* cSrc, size_t cSrcSize, \
98e0c1b49fSNick Terrell const HUF_DTable* DTable) \
99e0c1b49fSNick Terrell { \
100e0c1b49fSNick Terrell return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
101e0c1b49fSNick Terrell } \
102e0c1b49fSNick Terrell \
103e0c1b49fSNick Terrell static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
104e0c1b49fSNick Terrell size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
105e0c1b49fSNick Terrell { \
106e0c1b49fSNick Terrell if (bmi2) { \
107e0c1b49fSNick Terrell return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \
108e0c1b49fSNick Terrell } \
109e0c1b49fSNick Terrell return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \
110e0c1b49fSNick Terrell }
111e0c1b49fSNick Terrell
112e0c1b49fSNick Terrell #else
113e0c1b49fSNick Terrell
114e0c1b49fSNick Terrell #define HUF_DGEN(fn) \
115e0c1b49fSNick Terrell static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
116e0c1b49fSNick Terrell size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
117e0c1b49fSNick Terrell { \
118e0c1b49fSNick Terrell (void)bmi2; \
119e0c1b49fSNick Terrell return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
120e0c1b49fSNick Terrell }
121e0c1b49fSNick Terrell
122e0c1b49fSNick Terrell #endif
123e0c1b49fSNick Terrell
124e0c1b49fSNick Terrell
125e0c1b49fSNick Terrell /*-***************************/
126e0c1b49fSNick Terrell /* generic DTableDesc */
127e0c1b49fSNick Terrell /*-***************************/
128e0c1b49fSNick Terrell typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc;
129e0c1b49fSNick Terrell
HUF_getDTableDesc(const HUF_DTable * table)130e0c1b49fSNick Terrell static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
131e0c1b49fSNick Terrell {
132e0c1b49fSNick Terrell DTableDesc dtd;
133e0c1b49fSNick Terrell ZSTD_memcpy(&dtd, table, sizeof(dtd));
134e0c1b49fSNick Terrell return dtd;
135e0c1b49fSNick Terrell }
136e0c1b49fSNick Terrell
1372aa14b1aSNick Terrell #if ZSTD_ENABLE_ASM_X86_64_BMI2
1382aa14b1aSNick Terrell
HUF_initDStream(BYTE const * ip)1392aa14b1aSNick Terrell static size_t HUF_initDStream(BYTE const* ip) {
1402aa14b1aSNick Terrell BYTE const lastByte = ip[7];
1412aa14b1aSNick Terrell size_t const bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
1422aa14b1aSNick Terrell size_t const value = MEM_readLEST(ip) | 1;
1432aa14b1aSNick Terrell assert(bitsConsumed <= 8);
1442aa14b1aSNick Terrell return value << bitsConsumed;
1452aa14b1aSNick Terrell }
1462aa14b1aSNick Terrell typedef struct {
1472aa14b1aSNick Terrell BYTE const* ip[4];
1482aa14b1aSNick Terrell BYTE* op[4];
1492aa14b1aSNick Terrell U64 bits[4];
1502aa14b1aSNick Terrell void const* dt;
1512aa14b1aSNick Terrell BYTE const* ilimit;
1522aa14b1aSNick Terrell BYTE* oend;
1532aa14b1aSNick Terrell BYTE const* iend[4];
1542aa14b1aSNick Terrell } HUF_DecompressAsmArgs;
1552aa14b1aSNick Terrell
1562aa14b1aSNick Terrell /*
1572aa14b1aSNick Terrell * Initializes args for the asm decoding loop.
1582aa14b1aSNick Terrell * @returns 0 on success
1592aa14b1aSNick Terrell * 1 if the fallback implementation should be used.
1602aa14b1aSNick Terrell * Or an error code on failure.
1612aa14b1aSNick Terrell */
HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs * args,void * dst,size_t dstSize,void const * src,size_t srcSize,const HUF_DTable * DTable)1622aa14b1aSNick Terrell static size_t HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs* args, void* dst, size_t dstSize, void const* src, size_t srcSize, const HUF_DTable* DTable)
1632aa14b1aSNick Terrell {
1642aa14b1aSNick Terrell void const* dt = DTable + 1;
1652aa14b1aSNick Terrell U32 const dtLog = HUF_getDTableDesc(DTable).tableLog;
1662aa14b1aSNick Terrell
1672aa14b1aSNick Terrell const BYTE* const ilimit = (const BYTE*)src + 6 + 8;
1682aa14b1aSNick Terrell
1692aa14b1aSNick Terrell BYTE* const oend = (BYTE*)dst + dstSize;
1702aa14b1aSNick Terrell
1712aa14b1aSNick Terrell /* The following condition is false on x32 platform,
1722aa14b1aSNick Terrell * but HUF_asm is not compatible with this ABI */
1732aa14b1aSNick Terrell if (!(MEM_isLittleEndian() && !MEM_32bits())) return 1;
1742aa14b1aSNick Terrell
1752aa14b1aSNick Terrell /* strict minimum : jump table + 1 byte per stream */
1762aa14b1aSNick Terrell if (srcSize < 10)
1772aa14b1aSNick Terrell return ERROR(corruption_detected);
1782aa14b1aSNick Terrell
1792aa14b1aSNick Terrell /* Must have at least 8 bytes per stream because we don't handle initializing smaller bit containers.
1802aa14b1aSNick Terrell * If table log is not correct at this point, fallback to the old decoder.
1812aa14b1aSNick Terrell * On small inputs we don't have enough data to trigger the fast loop, so use the old decoder.
1822aa14b1aSNick Terrell */
1832aa14b1aSNick Terrell if (dtLog != HUF_DECODER_FAST_TABLELOG)
1842aa14b1aSNick Terrell return 1;
1852aa14b1aSNick Terrell
1862aa14b1aSNick Terrell /* Read the jump table. */
1872aa14b1aSNick Terrell {
1882aa14b1aSNick Terrell const BYTE* const istart = (const BYTE*)src;
1892aa14b1aSNick Terrell size_t const length1 = MEM_readLE16(istart);
1902aa14b1aSNick Terrell size_t const length2 = MEM_readLE16(istart+2);
1912aa14b1aSNick Terrell size_t const length3 = MEM_readLE16(istart+4);
1922aa14b1aSNick Terrell size_t const length4 = srcSize - (length1 + length2 + length3 + 6);
1932aa14b1aSNick Terrell args->iend[0] = istart + 6; /* jumpTable */
1942aa14b1aSNick Terrell args->iend[1] = args->iend[0] + length1;
1952aa14b1aSNick Terrell args->iend[2] = args->iend[1] + length2;
1962aa14b1aSNick Terrell args->iend[3] = args->iend[2] + length3;
1972aa14b1aSNick Terrell
1982aa14b1aSNick Terrell /* HUF_initDStream() requires this, and this small of an input
1992aa14b1aSNick Terrell * won't benefit from the ASM loop anyways.
2002aa14b1aSNick Terrell * length1 must be >= 16 so that ip[0] >= ilimit before the loop
2012aa14b1aSNick Terrell * starts.
2022aa14b1aSNick Terrell */
2032aa14b1aSNick Terrell if (length1 < 16 || length2 < 8 || length3 < 8 || length4 < 8)
2042aa14b1aSNick Terrell return 1;
2052aa14b1aSNick Terrell if (length4 > srcSize) return ERROR(corruption_detected); /* overflow */
2062aa14b1aSNick Terrell }
2072aa14b1aSNick Terrell /* ip[] contains the position that is currently loaded into bits[]. */
2082aa14b1aSNick Terrell args->ip[0] = args->iend[1] - sizeof(U64);
2092aa14b1aSNick Terrell args->ip[1] = args->iend[2] - sizeof(U64);
2102aa14b1aSNick Terrell args->ip[2] = args->iend[3] - sizeof(U64);
2112aa14b1aSNick Terrell args->ip[3] = (BYTE const*)src + srcSize - sizeof(U64);
2122aa14b1aSNick Terrell
2132aa14b1aSNick Terrell /* op[] contains the output pointers. */
2142aa14b1aSNick Terrell args->op[0] = (BYTE*)dst;
2152aa14b1aSNick Terrell args->op[1] = args->op[0] + (dstSize+3)/4;
2162aa14b1aSNick Terrell args->op[2] = args->op[1] + (dstSize+3)/4;
2172aa14b1aSNick Terrell args->op[3] = args->op[2] + (dstSize+3)/4;
2182aa14b1aSNick Terrell
2192aa14b1aSNick Terrell /* No point to call the ASM loop for tiny outputs. */
2202aa14b1aSNick Terrell if (args->op[3] >= oend)
2212aa14b1aSNick Terrell return 1;
2222aa14b1aSNick Terrell
2232aa14b1aSNick Terrell /* bits[] is the bit container.
2242aa14b1aSNick Terrell * It is read from the MSB down to the LSB.
2252aa14b1aSNick Terrell * It is shifted left as it is read, and zeros are
2262aa14b1aSNick Terrell * shifted in. After the lowest valid bit a 1 is
2272aa14b1aSNick Terrell * set, so that CountTrailingZeros(bits[]) can be used
2282aa14b1aSNick Terrell * to count how many bits we've consumed.
2292aa14b1aSNick Terrell */
2302aa14b1aSNick Terrell args->bits[0] = HUF_initDStream(args->ip[0]);
2312aa14b1aSNick Terrell args->bits[1] = HUF_initDStream(args->ip[1]);
2322aa14b1aSNick Terrell args->bits[2] = HUF_initDStream(args->ip[2]);
2332aa14b1aSNick Terrell args->bits[3] = HUF_initDStream(args->ip[3]);
2342aa14b1aSNick Terrell
2352aa14b1aSNick Terrell /* If ip[] >= ilimit, it is guaranteed to be safe to
2362aa14b1aSNick Terrell * reload bits[]. It may be beyond its section, but is
2372aa14b1aSNick Terrell * guaranteed to be valid (>= istart).
2382aa14b1aSNick Terrell */
2392aa14b1aSNick Terrell args->ilimit = ilimit;
2402aa14b1aSNick Terrell
2412aa14b1aSNick Terrell args->oend = oend;
2422aa14b1aSNick Terrell args->dt = dt;
2432aa14b1aSNick Terrell
2442aa14b1aSNick Terrell return 0;
2452aa14b1aSNick Terrell }
2462aa14b1aSNick Terrell
HUF_initRemainingDStream(BIT_DStream_t * bit,HUF_DecompressAsmArgs const * args,int stream,BYTE * segmentEnd)2472aa14b1aSNick Terrell static size_t HUF_initRemainingDStream(BIT_DStream_t* bit, HUF_DecompressAsmArgs const* args, int stream, BYTE* segmentEnd)
2482aa14b1aSNick Terrell {
2492aa14b1aSNick Terrell /* Validate that we haven't overwritten. */
2502aa14b1aSNick Terrell if (args->op[stream] > segmentEnd)
2512aa14b1aSNick Terrell return ERROR(corruption_detected);
2522aa14b1aSNick Terrell /* Validate that we haven't read beyond iend[].
2532aa14b1aSNick Terrell * Note that ip[] may be < iend[] because the MSB is
2542aa14b1aSNick Terrell * the next bit to read, and we may have consumed 100%
2552aa14b1aSNick Terrell * of the stream, so down to iend[i] - 8 is valid.
2562aa14b1aSNick Terrell */
2572aa14b1aSNick Terrell if (args->ip[stream] < args->iend[stream] - 8)
2582aa14b1aSNick Terrell return ERROR(corruption_detected);
2592aa14b1aSNick Terrell
2602aa14b1aSNick Terrell /* Construct the BIT_DStream_t. */
2612aa14b1aSNick Terrell bit->bitContainer = MEM_readLE64(args->ip[stream]);
2622aa14b1aSNick Terrell bit->bitsConsumed = ZSTD_countTrailingZeros((size_t)args->bits[stream]);
2632aa14b1aSNick Terrell bit->start = (const char*)args->iend[0];
2642aa14b1aSNick Terrell bit->limitPtr = bit->start + sizeof(size_t);
2652aa14b1aSNick Terrell bit->ptr = (const char*)args->ip[stream];
2662aa14b1aSNick Terrell
2672aa14b1aSNick Terrell return 0;
2682aa14b1aSNick Terrell }
2692aa14b1aSNick Terrell #endif
2702aa14b1aSNick Terrell
271e0c1b49fSNick Terrell
272e0c1b49fSNick Terrell #ifndef HUF_FORCE_DECOMPRESS_X2
273e0c1b49fSNick Terrell
274e0c1b49fSNick Terrell /*-***************************/
275e0c1b49fSNick Terrell /* single-symbol decoding */
276e0c1b49fSNick Terrell /*-***************************/
2772aa14b1aSNick Terrell typedef struct { BYTE nbBits; BYTE byte; } HUF_DEltX1; /* single-symbol decoding */
278e0c1b49fSNick Terrell
279e0c1b49fSNick Terrell /*
280e0c1b49fSNick Terrell * Packs 4 HUF_DEltX1 structs into a U64. This is used to lay down 4 entries at
281e0c1b49fSNick Terrell * a time.
282e0c1b49fSNick Terrell */
HUF_DEltX1_set4(BYTE symbol,BYTE nbBits)283e0c1b49fSNick Terrell static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) {
284e0c1b49fSNick Terrell U64 D4;
285e0c1b49fSNick Terrell if (MEM_isLittleEndian()) {
286e0c1b49fSNick Terrell D4 = (symbol << 8) + nbBits;
2872aa14b1aSNick Terrell } else {
2882aa14b1aSNick Terrell D4 = symbol + (nbBits << 8);
289e0c1b49fSNick Terrell }
290e0c1b49fSNick Terrell D4 *= 0x0001000100010001ULL;
291e0c1b49fSNick Terrell return D4;
292e0c1b49fSNick Terrell }
293e0c1b49fSNick Terrell
2942aa14b1aSNick Terrell /*
2952aa14b1aSNick Terrell * Increase the tableLog to targetTableLog and rescales the stats.
2962aa14b1aSNick Terrell * If tableLog > targetTableLog this is a no-op.
2972aa14b1aSNick Terrell * @returns New tableLog
2982aa14b1aSNick Terrell */
HUF_rescaleStats(BYTE * huffWeight,U32 * rankVal,U32 nbSymbols,U32 tableLog,U32 targetTableLog)2992aa14b1aSNick Terrell static U32 HUF_rescaleStats(BYTE* huffWeight, U32* rankVal, U32 nbSymbols, U32 tableLog, U32 targetTableLog)
3002aa14b1aSNick Terrell {
3012aa14b1aSNick Terrell if (tableLog > targetTableLog)
3022aa14b1aSNick Terrell return tableLog;
3032aa14b1aSNick Terrell if (tableLog < targetTableLog) {
3042aa14b1aSNick Terrell U32 const scale = targetTableLog - tableLog;
3052aa14b1aSNick Terrell U32 s;
3062aa14b1aSNick Terrell /* Increase the weight for all non-zero probability symbols by scale. */
3072aa14b1aSNick Terrell for (s = 0; s < nbSymbols; ++s) {
3082aa14b1aSNick Terrell huffWeight[s] += (BYTE)((huffWeight[s] == 0) ? 0 : scale);
3092aa14b1aSNick Terrell }
3102aa14b1aSNick Terrell /* Update rankVal to reflect the new weights.
3112aa14b1aSNick Terrell * All weights except 0 get moved to weight + scale.
3122aa14b1aSNick Terrell * Weights [1, scale] are empty.
3132aa14b1aSNick Terrell */
3142aa14b1aSNick Terrell for (s = targetTableLog; s > scale; --s) {
3152aa14b1aSNick Terrell rankVal[s] = rankVal[s - scale];
3162aa14b1aSNick Terrell }
3172aa14b1aSNick Terrell for (s = scale; s > 0; --s) {
3182aa14b1aSNick Terrell rankVal[s] = 0;
3192aa14b1aSNick Terrell }
3202aa14b1aSNick Terrell }
3212aa14b1aSNick Terrell return targetTableLog;
3222aa14b1aSNick Terrell }
3232aa14b1aSNick Terrell
324e0c1b49fSNick Terrell typedef struct {
325e0c1b49fSNick Terrell U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];
326e0c1b49fSNick Terrell U32 rankStart[HUF_TABLELOG_ABSOLUTEMAX + 1];
327e0c1b49fSNick Terrell U32 statsWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
328e0c1b49fSNick Terrell BYTE symbols[HUF_SYMBOLVALUE_MAX + 1];
329e0c1b49fSNick Terrell BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];
330e0c1b49fSNick Terrell } HUF_ReadDTableX1_Workspace;
331e0c1b49fSNick Terrell
332e0c1b49fSNick Terrell
HUF_readDTableX1_wksp(HUF_DTable * DTable,const void * src,size_t srcSize,void * workSpace,size_t wkspSize)333e0c1b49fSNick Terrell size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
334e0c1b49fSNick Terrell {
335e0c1b49fSNick Terrell return HUF_readDTableX1_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0);
336e0c1b49fSNick Terrell }
337e0c1b49fSNick Terrell
HUF_readDTableX1_wksp_bmi2(HUF_DTable * DTable,const void * src,size_t srcSize,void * workSpace,size_t wkspSize,int bmi2)338e0c1b49fSNick Terrell size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2)
339e0c1b49fSNick Terrell {
340e0c1b49fSNick Terrell U32 tableLog = 0;
341e0c1b49fSNick Terrell U32 nbSymbols = 0;
342e0c1b49fSNick Terrell size_t iSize;
343e0c1b49fSNick Terrell void* const dtPtr = DTable + 1;
344e0c1b49fSNick Terrell HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr;
345e0c1b49fSNick Terrell HUF_ReadDTableX1_Workspace* wksp = (HUF_ReadDTableX1_Workspace*)workSpace;
346e0c1b49fSNick Terrell
347e0c1b49fSNick Terrell DEBUG_STATIC_ASSERT(HUF_DECOMPRESS_WORKSPACE_SIZE >= sizeof(*wksp));
348e0c1b49fSNick Terrell if (sizeof(*wksp) > wkspSize) return ERROR(tableLog_tooLarge);
349e0c1b49fSNick Terrell
350e0c1b49fSNick Terrell DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
351e0c1b49fSNick Terrell /* ZSTD_memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
352e0c1b49fSNick Terrell
353e0c1b49fSNick Terrell iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), bmi2);
354e0c1b49fSNick Terrell if (HUF_isError(iSize)) return iSize;
355e0c1b49fSNick Terrell
3562aa14b1aSNick Terrell
357e0c1b49fSNick Terrell /* Table header */
358e0c1b49fSNick Terrell { DTableDesc dtd = HUF_getDTableDesc(DTable);
3592aa14b1aSNick Terrell U32 const maxTableLog = dtd.maxTableLog + 1;
3602aa14b1aSNick Terrell U32 const targetTableLog = MIN(maxTableLog, HUF_DECODER_FAST_TABLELOG);
3612aa14b1aSNick Terrell tableLog = HUF_rescaleStats(wksp->huffWeight, wksp->rankVal, nbSymbols, tableLog, targetTableLog);
362e0c1b49fSNick Terrell if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */
363e0c1b49fSNick Terrell dtd.tableType = 0;
364e0c1b49fSNick Terrell dtd.tableLog = (BYTE)tableLog;
365e0c1b49fSNick Terrell ZSTD_memcpy(DTable, &dtd, sizeof(dtd));
366e0c1b49fSNick Terrell }
367e0c1b49fSNick Terrell
368e0c1b49fSNick Terrell /* Compute symbols and rankStart given rankVal:
369e0c1b49fSNick Terrell *
370e0c1b49fSNick Terrell * rankVal already contains the number of values of each weight.
371e0c1b49fSNick Terrell *
372e0c1b49fSNick Terrell * symbols contains the symbols ordered by weight. First are the rankVal[0]
373e0c1b49fSNick Terrell * weight 0 symbols, followed by the rankVal[1] weight 1 symbols, and so on.
374e0c1b49fSNick Terrell * symbols[0] is filled (but unused) to avoid a branch.
375e0c1b49fSNick Terrell *
376e0c1b49fSNick Terrell * rankStart contains the offset where each rank belongs in the DTable.
377e0c1b49fSNick Terrell * rankStart[0] is not filled because there are no entries in the table for
378e0c1b49fSNick Terrell * weight 0.
379e0c1b49fSNick Terrell */
380e0c1b49fSNick Terrell {
381e0c1b49fSNick Terrell int n;
382e0c1b49fSNick Terrell int nextRankStart = 0;
383e0c1b49fSNick Terrell int const unroll = 4;
384e0c1b49fSNick Terrell int const nLimit = (int)nbSymbols - unroll + 1;
385e0c1b49fSNick Terrell for (n=0; n<(int)tableLog+1; n++) {
386e0c1b49fSNick Terrell U32 const curr = nextRankStart;
387e0c1b49fSNick Terrell nextRankStart += wksp->rankVal[n];
388e0c1b49fSNick Terrell wksp->rankStart[n] = curr;
389e0c1b49fSNick Terrell }
390e0c1b49fSNick Terrell for (n=0; n < nLimit; n += unroll) {
391e0c1b49fSNick Terrell int u;
392e0c1b49fSNick Terrell for (u=0; u < unroll; ++u) {
393e0c1b49fSNick Terrell size_t const w = wksp->huffWeight[n+u];
394e0c1b49fSNick Terrell wksp->symbols[wksp->rankStart[w]++] = (BYTE)(n+u);
395e0c1b49fSNick Terrell }
396e0c1b49fSNick Terrell }
397e0c1b49fSNick Terrell for (; n < (int)nbSymbols; ++n) {
398e0c1b49fSNick Terrell size_t const w = wksp->huffWeight[n];
399e0c1b49fSNick Terrell wksp->symbols[wksp->rankStart[w]++] = (BYTE)n;
400e0c1b49fSNick Terrell }
401e0c1b49fSNick Terrell }
402e0c1b49fSNick Terrell
403e0c1b49fSNick Terrell /* fill DTable
404e0c1b49fSNick Terrell * We fill all entries of each weight in order.
4052aa14b1aSNick Terrell * That way length is a constant for each iteration of the outer loop.
406e0c1b49fSNick Terrell * We can switch based on the length to a different inner loop which is
407e0c1b49fSNick Terrell * optimized for that particular case.
408e0c1b49fSNick Terrell */
409e0c1b49fSNick Terrell {
410e0c1b49fSNick Terrell U32 w;
411e0c1b49fSNick Terrell int symbol=wksp->rankVal[0];
412e0c1b49fSNick Terrell int rankStart=0;
413e0c1b49fSNick Terrell for (w=1; w<tableLog+1; ++w) {
414e0c1b49fSNick Terrell int const symbolCount = wksp->rankVal[w];
415e0c1b49fSNick Terrell int const length = (1 << w) >> 1;
416e0c1b49fSNick Terrell int uStart = rankStart;
417e0c1b49fSNick Terrell BYTE const nbBits = (BYTE)(tableLog + 1 - w);
418e0c1b49fSNick Terrell int s;
419e0c1b49fSNick Terrell int u;
420e0c1b49fSNick Terrell switch (length) {
421e0c1b49fSNick Terrell case 1:
422e0c1b49fSNick Terrell for (s=0; s<symbolCount; ++s) {
423e0c1b49fSNick Terrell HUF_DEltX1 D;
424e0c1b49fSNick Terrell D.byte = wksp->symbols[symbol + s];
425e0c1b49fSNick Terrell D.nbBits = nbBits;
426e0c1b49fSNick Terrell dt[uStart] = D;
427e0c1b49fSNick Terrell uStart += 1;
428e0c1b49fSNick Terrell }
429e0c1b49fSNick Terrell break;
430e0c1b49fSNick Terrell case 2:
431e0c1b49fSNick Terrell for (s=0; s<symbolCount; ++s) {
432e0c1b49fSNick Terrell HUF_DEltX1 D;
433e0c1b49fSNick Terrell D.byte = wksp->symbols[symbol + s];
434e0c1b49fSNick Terrell D.nbBits = nbBits;
435e0c1b49fSNick Terrell dt[uStart+0] = D;
436e0c1b49fSNick Terrell dt[uStart+1] = D;
437e0c1b49fSNick Terrell uStart += 2;
438e0c1b49fSNick Terrell }
439e0c1b49fSNick Terrell break;
440e0c1b49fSNick Terrell case 4:
441e0c1b49fSNick Terrell for (s=0; s<symbolCount; ++s) {
442e0c1b49fSNick Terrell U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
443e0c1b49fSNick Terrell MEM_write64(dt + uStart, D4);
444e0c1b49fSNick Terrell uStart += 4;
445e0c1b49fSNick Terrell }
446e0c1b49fSNick Terrell break;
447e0c1b49fSNick Terrell case 8:
448e0c1b49fSNick Terrell for (s=0; s<symbolCount; ++s) {
449e0c1b49fSNick Terrell U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
450e0c1b49fSNick Terrell MEM_write64(dt + uStart, D4);
451e0c1b49fSNick Terrell MEM_write64(dt + uStart + 4, D4);
452e0c1b49fSNick Terrell uStart += 8;
453e0c1b49fSNick Terrell }
454e0c1b49fSNick Terrell break;
455e0c1b49fSNick Terrell default:
456e0c1b49fSNick Terrell for (s=0; s<symbolCount; ++s) {
457e0c1b49fSNick Terrell U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
458e0c1b49fSNick Terrell for (u=0; u < length; u += 16) {
459e0c1b49fSNick Terrell MEM_write64(dt + uStart + u + 0, D4);
460e0c1b49fSNick Terrell MEM_write64(dt + uStart + u + 4, D4);
461e0c1b49fSNick Terrell MEM_write64(dt + uStart + u + 8, D4);
462e0c1b49fSNick Terrell MEM_write64(dt + uStart + u + 12, D4);
463e0c1b49fSNick Terrell }
464e0c1b49fSNick Terrell assert(u == length);
465e0c1b49fSNick Terrell uStart += length;
466e0c1b49fSNick Terrell }
467e0c1b49fSNick Terrell break;
468e0c1b49fSNick Terrell }
469e0c1b49fSNick Terrell symbol += symbolCount;
470e0c1b49fSNick Terrell rankStart += symbolCount * length;
471e0c1b49fSNick Terrell }
472e0c1b49fSNick Terrell }
473e0c1b49fSNick Terrell return iSize;
474e0c1b49fSNick Terrell }
475e0c1b49fSNick Terrell
476e0c1b49fSNick Terrell FORCE_INLINE_TEMPLATE BYTE
HUF_decodeSymbolX1(BIT_DStream_t * Dstream,const HUF_DEltX1 * dt,const U32 dtLog)477e0c1b49fSNick Terrell HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog)
478e0c1b49fSNick Terrell {
479e0c1b49fSNick Terrell size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
480e0c1b49fSNick Terrell BYTE const c = dt[val].byte;
481e0c1b49fSNick Terrell BIT_skipBits(Dstream, dt[val].nbBits);
482e0c1b49fSNick Terrell return c;
483e0c1b49fSNick Terrell }
484e0c1b49fSNick Terrell
485e0c1b49fSNick Terrell #define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \
486e0c1b49fSNick Terrell *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog)
487e0c1b49fSNick Terrell
488e0c1b49fSNick Terrell #define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \
489e0c1b49fSNick Terrell if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
490e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
491e0c1b49fSNick Terrell
492e0c1b49fSNick Terrell #define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \
493e0c1b49fSNick Terrell if (MEM_64bits()) \
494e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
495e0c1b49fSNick Terrell
496e0c1b49fSNick Terrell HINT_INLINE size_t
HUF_decodeStreamX1(BYTE * p,BIT_DStream_t * const bitDPtr,BYTE * const pEnd,const HUF_DEltX1 * const dt,const U32 dtLog)497e0c1b49fSNick Terrell HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog)
498e0c1b49fSNick Terrell {
499e0c1b49fSNick Terrell BYTE* const pStart = p;
500e0c1b49fSNick Terrell
501e0c1b49fSNick Terrell /* up to 4 symbols at a time */
5022aa14b1aSNick Terrell if ((pEnd - p) > 3) {
503e0c1b49fSNick Terrell while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) {
504e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
505e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX1_1(p, bitDPtr);
506e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
507e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
508e0c1b49fSNick Terrell }
5092aa14b1aSNick Terrell } else {
5102aa14b1aSNick Terrell BIT_reloadDStream(bitDPtr);
5112aa14b1aSNick Terrell }
512e0c1b49fSNick Terrell
513e0c1b49fSNick Terrell /* [0-3] symbols remaining */
514e0c1b49fSNick Terrell if (MEM_32bits())
515e0c1b49fSNick Terrell while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd))
516e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
517e0c1b49fSNick Terrell
518e0c1b49fSNick Terrell /* no more data to retrieve from bitstream, no need to reload */
519e0c1b49fSNick Terrell while (p < pEnd)
520e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
521e0c1b49fSNick Terrell
522e0c1b49fSNick Terrell return pEnd-pStart;
523e0c1b49fSNick Terrell }
524e0c1b49fSNick Terrell
525e0c1b49fSNick Terrell FORCE_INLINE_TEMPLATE size_t
HUF_decompress1X1_usingDTable_internal_body(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)526e0c1b49fSNick Terrell HUF_decompress1X1_usingDTable_internal_body(
527e0c1b49fSNick Terrell void* dst, size_t dstSize,
528e0c1b49fSNick Terrell const void* cSrc, size_t cSrcSize,
529e0c1b49fSNick Terrell const HUF_DTable* DTable)
530e0c1b49fSNick Terrell {
531e0c1b49fSNick Terrell BYTE* op = (BYTE*)dst;
532e0c1b49fSNick Terrell BYTE* const oend = op + dstSize;
533e0c1b49fSNick Terrell const void* dtPtr = DTable + 1;
534e0c1b49fSNick Terrell const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
535e0c1b49fSNick Terrell BIT_DStream_t bitD;
536e0c1b49fSNick Terrell DTableDesc const dtd = HUF_getDTableDesc(DTable);
537e0c1b49fSNick Terrell U32 const dtLog = dtd.tableLog;
538e0c1b49fSNick Terrell
539e0c1b49fSNick Terrell CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
540e0c1b49fSNick Terrell
541e0c1b49fSNick Terrell HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog);
542e0c1b49fSNick Terrell
543e0c1b49fSNick Terrell if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
544e0c1b49fSNick Terrell
545e0c1b49fSNick Terrell return dstSize;
546e0c1b49fSNick Terrell }
547e0c1b49fSNick Terrell
548e0c1b49fSNick Terrell FORCE_INLINE_TEMPLATE size_t
HUF_decompress4X1_usingDTable_internal_body(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)549e0c1b49fSNick Terrell HUF_decompress4X1_usingDTable_internal_body(
550e0c1b49fSNick Terrell void* dst, size_t dstSize,
551e0c1b49fSNick Terrell const void* cSrc, size_t cSrcSize,
552e0c1b49fSNick Terrell const HUF_DTable* DTable)
553e0c1b49fSNick Terrell {
554e0c1b49fSNick Terrell /* Check */
555e0c1b49fSNick Terrell if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
556e0c1b49fSNick Terrell
557e0c1b49fSNick Terrell { const BYTE* const istart = (const BYTE*) cSrc;
558e0c1b49fSNick Terrell BYTE* const ostart = (BYTE*) dst;
559e0c1b49fSNick Terrell BYTE* const oend = ostart + dstSize;
560e0c1b49fSNick Terrell BYTE* const olimit = oend - 3;
561e0c1b49fSNick Terrell const void* const dtPtr = DTable + 1;
562e0c1b49fSNick Terrell const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
563e0c1b49fSNick Terrell
564e0c1b49fSNick Terrell /* Init */
565e0c1b49fSNick Terrell BIT_DStream_t bitD1;
566e0c1b49fSNick Terrell BIT_DStream_t bitD2;
567e0c1b49fSNick Terrell BIT_DStream_t bitD3;
568e0c1b49fSNick Terrell BIT_DStream_t bitD4;
569e0c1b49fSNick Terrell size_t const length1 = MEM_readLE16(istart);
570e0c1b49fSNick Terrell size_t const length2 = MEM_readLE16(istart+2);
571e0c1b49fSNick Terrell size_t const length3 = MEM_readLE16(istart+4);
572e0c1b49fSNick Terrell size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
573e0c1b49fSNick Terrell const BYTE* const istart1 = istart + 6; /* jumpTable */
574e0c1b49fSNick Terrell const BYTE* const istart2 = istart1 + length1;
575e0c1b49fSNick Terrell const BYTE* const istart3 = istart2 + length2;
576e0c1b49fSNick Terrell const BYTE* const istart4 = istart3 + length3;
577e0c1b49fSNick Terrell const size_t segmentSize = (dstSize+3) / 4;
578e0c1b49fSNick Terrell BYTE* const opStart2 = ostart + segmentSize;
579e0c1b49fSNick Terrell BYTE* const opStart3 = opStart2 + segmentSize;
580e0c1b49fSNick Terrell BYTE* const opStart4 = opStart3 + segmentSize;
581e0c1b49fSNick Terrell BYTE* op1 = ostart;
582e0c1b49fSNick Terrell BYTE* op2 = opStart2;
583e0c1b49fSNick Terrell BYTE* op3 = opStart3;
584e0c1b49fSNick Terrell BYTE* op4 = opStart4;
585e0c1b49fSNick Terrell DTableDesc const dtd = HUF_getDTableDesc(DTable);
586e0c1b49fSNick Terrell U32 const dtLog = dtd.tableLog;
587e0c1b49fSNick Terrell U32 endSignal = 1;
588e0c1b49fSNick Terrell
589e0c1b49fSNick Terrell if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
5902aa14b1aSNick Terrell if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */
591e0c1b49fSNick Terrell CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
592e0c1b49fSNick Terrell CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
593e0c1b49fSNick Terrell CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
594e0c1b49fSNick Terrell CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
595e0c1b49fSNick Terrell
596e0c1b49fSNick Terrell /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
5972aa14b1aSNick Terrell if ((size_t)(oend - op4) >= sizeof(size_t)) {
598e0c1b49fSNick Terrell for ( ; (endSignal) & (op4 < olimit) ; ) {
599e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
600e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
601e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
602e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
603e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX1_1(op1, &bitD1);
604e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX1_1(op2, &bitD2);
605e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX1_1(op3, &bitD3);
606e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX1_1(op4, &bitD4);
607e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
608e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
609e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
610e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
611e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX1_0(op1, &bitD1);
612e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
613e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
614e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
615e0c1b49fSNick Terrell endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
616e0c1b49fSNick Terrell endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
617e0c1b49fSNick Terrell endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
618e0c1b49fSNick Terrell endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
619e0c1b49fSNick Terrell }
6202aa14b1aSNick Terrell }
621e0c1b49fSNick Terrell
622e0c1b49fSNick Terrell /* check corruption */
623e0c1b49fSNick Terrell /* note : should not be necessary : op# advance in lock step, and we control op4.
624e0c1b49fSNick Terrell * but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */
625e0c1b49fSNick Terrell if (op1 > opStart2) return ERROR(corruption_detected);
626e0c1b49fSNick Terrell if (op2 > opStart3) return ERROR(corruption_detected);
627e0c1b49fSNick Terrell if (op3 > opStart4) return ERROR(corruption_detected);
628e0c1b49fSNick Terrell /* note : op4 supposed already verified within main loop */
629e0c1b49fSNick Terrell
630e0c1b49fSNick Terrell /* finish bitStreams one by one */
631e0c1b49fSNick Terrell HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog);
632e0c1b49fSNick Terrell HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog);
633e0c1b49fSNick Terrell HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog);
634e0c1b49fSNick Terrell HUF_decodeStreamX1(op4, &bitD4, oend, dt, dtLog);
635e0c1b49fSNick Terrell
636e0c1b49fSNick Terrell /* check */
637e0c1b49fSNick Terrell { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
638e0c1b49fSNick Terrell if (!endCheck) return ERROR(corruption_detected); }
639e0c1b49fSNick Terrell
640e0c1b49fSNick Terrell /* decoded size */
641e0c1b49fSNick Terrell return dstSize;
642e0c1b49fSNick Terrell }
643e0c1b49fSNick Terrell }
644e0c1b49fSNick Terrell
6452aa14b1aSNick Terrell #if HUF_NEED_BMI2_FUNCTION
6462aa14b1aSNick Terrell static BMI2_TARGET_ATTRIBUTE
HUF_decompress4X1_usingDTable_internal_bmi2(void * dst,size_t dstSize,void const * cSrc,size_t cSrcSize,HUF_DTable const * DTable)6472aa14b1aSNick Terrell size_t HUF_decompress4X1_usingDTable_internal_bmi2(void* dst, size_t dstSize, void const* cSrc,
6482aa14b1aSNick Terrell size_t cSrcSize, HUF_DTable const* DTable) {
6492aa14b1aSNick Terrell return HUF_decompress4X1_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
6502aa14b1aSNick Terrell }
6512aa14b1aSNick Terrell #endif
6522aa14b1aSNick Terrell
6532aa14b1aSNick Terrell #if HUF_NEED_DEFAULT_FUNCTION
6542aa14b1aSNick Terrell static
HUF_decompress4X1_usingDTable_internal_default(void * dst,size_t dstSize,void const * cSrc,size_t cSrcSize,HUF_DTable const * DTable)6552aa14b1aSNick Terrell size_t HUF_decompress4X1_usingDTable_internal_default(void* dst, size_t dstSize, void const* cSrc,
6562aa14b1aSNick Terrell size_t cSrcSize, HUF_DTable const* DTable) {
6572aa14b1aSNick Terrell return HUF_decompress4X1_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
6582aa14b1aSNick Terrell }
6592aa14b1aSNick Terrell #endif
6602aa14b1aSNick Terrell
6612aa14b1aSNick Terrell #if ZSTD_ENABLE_ASM_X86_64_BMI2
6622aa14b1aSNick Terrell
6632aa14b1aSNick Terrell HUF_ASM_DECL void HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop(HUF_DecompressAsmArgs* args) ZSTDLIB_HIDDEN;
6642aa14b1aSNick Terrell
6652aa14b1aSNick Terrell static HUF_ASM_X86_64_BMI2_ATTRS
6662aa14b1aSNick Terrell size_t
HUF_decompress4X1_usingDTable_internal_bmi2_asm(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)6672aa14b1aSNick Terrell HUF_decompress4X1_usingDTable_internal_bmi2_asm(
6682aa14b1aSNick Terrell void* dst, size_t dstSize,
6692aa14b1aSNick Terrell const void* cSrc, size_t cSrcSize,
6702aa14b1aSNick Terrell const HUF_DTable* DTable)
6712aa14b1aSNick Terrell {
6722aa14b1aSNick Terrell void const* dt = DTable + 1;
6732aa14b1aSNick Terrell const BYTE* const iend = (const BYTE*)cSrc + 6;
6742aa14b1aSNick Terrell BYTE* const oend = (BYTE*)dst + dstSize;
6752aa14b1aSNick Terrell HUF_DecompressAsmArgs args;
6762aa14b1aSNick Terrell {
6772aa14b1aSNick Terrell size_t const ret = HUF_DecompressAsmArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
6782aa14b1aSNick Terrell FORWARD_IF_ERROR(ret, "Failed to init asm args");
6792aa14b1aSNick Terrell if (ret != 0)
6802aa14b1aSNick Terrell return HUF_decompress4X1_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
6812aa14b1aSNick Terrell }
6822aa14b1aSNick Terrell
6832aa14b1aSNick Terrell assert(args.ip[0] >= args.ilimit);
6842aa14b1aSNick Terrell HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop(&args);
6852aa14b1aSNick Terrell
6862aa14b1aSNick Terrell /* Our loop guarantees that ip[] >= ilimit and that we haven't
6872aa14b1aSNick Terrell * overwritten any op[].
6882aa14b1aSNick Terrell */
6892aa14b1aSNick Terrell assert(args.ip[0] >= iend);
6902aa14b1aSNick Terrell assert(args.ip[1] >= iend);
6912aa14b1aSNick Terrell assert(args.ip[2] >= iend);
6922aa14b1aSNick Terrell assert(args.ip[3] >= iend);
6932aa14b1aSNick Terrell assert(args.op[3] <= oend);
6942aa14b1aSNick Terrell (void)iend;
6952aa14b1aSNick Terrell
6962aa14b1aSNick Terrell /* finish bit streams one by one. */
6972aa14b1aSNick Terrell {
6982aa14b1aSNick Terrell size_t const segmentSize = (dstSize+3) / 4;
6992aa14b1aSNick Terrell BYTE* segmentEnd = (BYTE*)dst;
7002aa14b1aSNick Terrell int i;
7012aa14b1aSNick Terrell for (i = 0; i < 4; ++i) {
7022aa14b1aSNick Terrell BIT_DStream_t bit;
7032aa14b1aSNick Terrell if (segmentSize <= (size_t)(oend - segmentEnd))
7042aa14b1aSNick Terrell segmentEnd += segmentSize;
7052aa14b1aSNick Terrell else
7062aa14b1aSNick Terrell segmentEnd = oend;
7072aa14b1aSNick Terrell FORWARD_IF_ERROR(HUF_initRemainingDStream(&bit, &args, i, segmentEnd), "corruption");
7082aa14b1aSNick Terrell /* Decompress and validate that we've produced exactly the expected length. */
7092aa14b1aSNick Terrell args.op[i] += HUF_decodeStreamX1(args.op[i], &bit, segmentEnd, (HUF_DEltX1 const*)dt, HUF_DECODER_FAST_TABLELOG);
7102aa14b1aSNick Terrell if (args.op[i] != segmentEnd) return ERROR(corruption_detected);
7112aa14b1aSNick Terrell }
7122aa14b1aSNick Terrell }
7132aa14b1aSNick Terrell
7142aa14b1aSNick Terrell /* decoded size */
7152aa14b1aSNick Terrell return dstSize;
7162aa14b1aSNick Terrell }
7172aa14b1aSNick Terrell #endif /* ZSTD_ENABLE_ASM_X86_64_BMI2 */
718e0c1b49fSNick Terrell
719e0c1b49fSNick Terrell typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
720e0c1b49fSNick Terrell const void *cSrc,
721e0c1b49fSNick Terrell size_t cSrcSize,
722e0c1b49fSNick Terrell const HUF_DTable *DTable);
723e0c1b49fSNick Terrell
HUF_DGEN(HUF_decompress1X1_usingDTable_internal)724e0c1b49fSNick Terrell HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
725e0c1b49fSNick Terrell
7262aa14b1aSNick Terrell static size_t HUF_decompress4X1_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc,
7272aa14b1aSNick Terrell size_t cSrcSize, HUF_DTable const* DTable, int bmi2)
7282aa14b1aSNick Terrell {
7292aa14b1aSNick Terrell #if DYNAMIC_BMI2
7302aa14b1aSNick Terrell if (bmi2) {
7312aa14b1aSNick Terrell # if ZSTD_ENABLE_ASM_X86_64_BMI2
7322aa14b1aSNick Terrell return HUF_decompress4X1_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);
7332aa14b1aSNick Terrell # else
7342aa14b1aSNick Terrell return HUF_decompress4X1_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
7352aa14b1aSNick Terrell # endif
7362aa14b1aSNick Terrell }
7372aa14b1aSNick Terrell #else
7382aa14b1aSNick Terrell (void)bmi2;
7392aa14b1aSNick Terrell #endif
7402aa14b1aSNick Terrell
7412aa14b1aSNick Terrell #if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)
7422aa14b1aSNick Terrell return HUF_decompress4X1_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);
7432aa14b1aSNick Terrell #else
7442aa14b1aSNick Terrell return HUF_decompress4X1_usingDTable_internal_default(dst, dstSize, cSrc, cSrcSize, DTable);
7452aa14b1aSNick Terrell #endif
7462aa14b1aSNick Terrell }
747e0c1b49fSNick Terrell
748e0c1b49fSNick Terrell
HUF_decompress1X1_usingDTable(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)749e0c1b49fSNick Terrell size_t HUF_decompress1X1_usingDTable(
750e0c1b49fSNick Terrell void* dst, size_t dstSize,
751e0c1b49fSNick Terrell const void* cSrc, size_t cSrcSize,
752e0c1b49fSNick Terrell const HUF_DTable* DTable)
753e0c1b49fSNick Terrell {
754e0c1b49fSNick Terrell DTableDesc dtd = HUF_getDTableDesc(DTable);
755e0c1b49fSNick Terrell if (dtd.tableType != 0) return ERROR(GENERIC);
756e0c1b49fSNick Terrell return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
757e0c1b49fSNick Terrell }
758e0c1b49fSNick Terrell
HUF_decompress1X1_DCtx_wksp(HUF_DTable * DCtx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize)759e0c1b49fSNick Terrell size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
760e0c1b49fSNick Terrell const void* cSrc, size_t cSrcSize,
761e0c1b49fSNick Terrell void* workSpace, size_t wkspSize)
762e0c1b49fSNick Terrell {
763e0c1b49fSNick Terrell const BYTE* ip = (const BYTE*) cSrc;
764e0c1b49fSNick Terrell
765e0c1b49fSNick Terrell size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
766e0c1b49fSNick Terrell if (HUF_isError(hSize)) return hSize;
767e0c1b49fSNick Terrell if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
768e0c1b49fSNick Terrell ip += hSize; cSrcSize -= hSize;
769e0c1b49fSNick Terrell
770e0c1b49fSNick Terrell return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
771e0c1b49fSNick Terrell }
772e0c1b49fSNick Terrell
773e0c1b49fSNick Terrell
HUF_decompress4X1_usingDTable(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)774e0c1b49fSNick Terrell size_t HUF_decompress4X1_usingDTable(
775e0c1b49fSNick Terrell void* dst, size_t dstSize,
776e0c1b49fSNick Terrell const void* cSrc, size_t cSrcSize,
777e0c1b49fSNick Terrell const HUF_DTable* DTable)
778e0c1b49fSNick Terrell {
779e0c1b49fSNick Terrell DTableDesc dtd = HUF_getDTableDesc(DTable);
780e0c1b49fSNick Terrell if (dtd.tableType != 0) return ERROR(GENERIC);
781e0c1b49fSNick Terrell return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
782e0c1b49fSNick Terrell }
783e0c1b49fSNick Terrell
HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize,int bmi2)784e0c1b49fSNick Terrell static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
785e0c1b49fSNick Terrell const void* cSrc, size_t cSrcSize,
786e0c1b49fSNick Terrell void* workSpace, size_t wkspSize, int bmi2)
787e0c1b49fSNick Terrell {
788e0c1b49fSNick Terrell const BYTE* ip = (const BYTE*) cSrc;
789e0c1b49fSNick Terrell
790e0c1b49fSNick Terrell size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
791e0c1b49fSNick Terrell if (HUF_isError(hSize)) return hSize;
792e0c1b49fSNick Terrell if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
793e0c1b49fSNick Terrell ip += hSize; cSrcSize -= hSize;
794e0c1b49fSNick Terrell
795e0c1b49fSNick Terrell return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
796e0c1b49fSNick Terrell }
797e0c1b49fSNick Terrell
HUF_decompress4X1_DCtx_wksp(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize)798e0c1b49fSNick Terrell size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
799e0c1b49fSNick Terrell const void* cSrc, size_t cSrcSize,
800e0c1b49fSNick Terrell void* workSpace, size_t wkspSize)
801e0c1b49fSNick Terrell {
802e0c1b49fSNick Terrell return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
803e0c1b49fSNick Terrell }
804e0c1b49fSNick Terrell
805e0c1b49fSNick Terrell
806e0c1b49fSNick Terrell #endif /* HUF_FORCE_DECOMPRESS_X2 */
807e0c1b49fSNick Terrell
808e0c1b49fSNick Terrell
809e0c1b49fSNick Terrell #ifndef HUF_FORCE_DECOMPRESS_X1
810e0c1b49fSNick Terrell
811e0c1b49fSNick Terrell /* *************************/
812e0c1b49fSNick Terrell /* double-symbols decoding */
813e0c1b49fSNick Terrell /* *************************/
814e0c1b49fSNick Terrell
815e0c1b49fSNick Terrell typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2; /* double-symbols decoding */
8162aa14b1aSNick Terrell typedef struct { BYTE symbol; } sortedSymbol_t;
817e0c1b49fSNick Terrell typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
818e0c1b49fSNick Terrell typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
819e0c1b49fSNick Terrell
8202aa14b1aSNick Terrell /*
8212aa14b1aSNick Terrell * Constructs a HUF_DEltX2 in a U32.
8222aa14b1aSNick Terrell */
HUF_buildDEltX2U32(U32 symbol,U32 nbBits,U32 baseSeq,int level)8232aa14b1aSNick Terrell static U32 HUF_buildDEltX2U32(U32 symbol, U32 nbBits, U32 baseSeq, int level)
8242aa14b1aSNick Terrell {
8252aa14b1aSNick Terrell U32 seq;
8262aa14b1aSNick Terrell DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, sequence) == 0);
8272aa14b1aSNick Terrell DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, nbBits) == 2);
8282aa14b1aSNick Terrell DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, length) == 3);
8292aa14b1aSNick Terrell DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(U32));
8302aa14b1aSNick Terrell if (MEM_isLittleEndian()) {
8312aa14b1aSNick Terrell seq = level == 1 ? symbol : (baseSeq + (symbol << 8));
8322aa14b1aSNick Terrell return seq + (nbBits << 16) + ((U32)level << 24);
8332aa14b1aSNick Terrell } else {
8342aa14b1aSNick Terrell seq = level == 1 ? (symbol << 8) : ((baseSeq << 8) + symbol);
8352aa14b1aSNick Terrell return (seq << 16) + (nbBits << 8) + (U32)level;
8362aa14b1aSNick Terrell }
8372aa14b1aSNick Terrell }
8382aa14b1aSNick Terrell
8392aa14b1aSNick Terrell /*
8402aa14b1aSNick Terrell * Constructs a HUF_DEltX2.
8412aa14b1aSNick Terrell */
HUF_buildDEltX2(U32 symbol,U32 nbBits,U32 baseSeq,int level)8422aa14b1aSNick Terrell static HUF_DEltX2 HUF_buildDEltX2(U32 symbol, U32 nbBits, U32 baseSeq, int level)
8432aa14b1aSNick Terrell {
8442aa14b1aSNick Terrell HUF_DEltX2 DElt;
8452aa14b1aSNick Terrell U32 const val = HUF_buildDEltX2U32(symbol, nbBits, baseSeq, level);
8462aa14b1aSNick Terrell DEBUG_STATIC_ASSERT(sizeof(DElt) == sizeof(val));
8472aa14b1aSNick Terrell ZSTD_memcpy(&DElt, &val, sizeof(val));
8482aa14b1aSNick Terrell return DElt;
8492aa14b1aSNick Terrell }
8502aa14b1aSNick Terrell
8512aa14b1aSNick Terrell /*
8522aa14b1aSNick Terrell * Constructs 2 HUF_DEltX2s and packs them into a U64.
8532aa14b1aSNick Terrell */
HUF_buildDEltX2U64(U32 symbol,U32 nbBits,U16 baseSeq,int level)8542aa14b1aSNick Terrell static U64 HUF_buildDEltX2U64(U32 symbol, U32 nbBits, U16 baseSeq, int level)
8552aa14b1aSNick Terrell {
8562aa14b1aSNick Terrell U32 DElt = HUF_buildDEltX2U32(symbol, nbBits, baseSeq, level);
8572aa14b1aSNick Terrell return (U64)DElt + ((U64)DElt << 32);
8582aa14b1aSNick Terrell }
8592aa14b1aSNick Terrell
8602aa14b1aSNick Terrell /*
8612aa14b1aSNick Terrell * Fills the DTable rank with all the symbols from [begin, end) that are each
8622aa14b1aSNick Terrell * nbBits long.
8632aa14b1aSNick Terrell *
8642aa14b1aSNick Terrell * @param DTableRank The start of the rank in the DTable.
8652aa14b1aSNick Terrell * @param begin The first symbol to fill (inclusive).
8662aa14b1aSNick Terrell * @param end The last symbol to fill (exclusive).
8672aa14b1aSNick Terrell * @param nbBits Each symbol is nbBits long.
8682aa14b1aSNick Terrell * @param tableLog The table log.
8692aa14b1aSNick Terrell * @param baseSeq If level == 1 { 0 } else { the first level symbol }
8702aa14b1aSNick Terrell * @param level The level in the table. Must be 1 or 2.
8712aa14b1aSNick Terrell */
HUF_fillDTableX2ForWeight(HUF_DEltX2 * DTableRank,sortedSymbol_t const * begin,sortedSymbol_t const * end,U32 nbBits,U32 tableLog,U16 baseSeq,int const level)8722aa14b1aSNick Terrell static void HUF_fillDTableX2ForWeight(
8732aa14b1aSNick Terrell HUF_DEltX2* DTableRank,
8742aa14b1aSNick Terrell sortedSymbol_t const* begin, sortedSymbol_t const* end,
8752aa14b1aSNick Terrell U32 nbBits, U32 tableLog,
8762aa14b1aSNick Terrell U16 baseSeq, int const level)
8772aa14b1aSNick Terrell {
8782aa14b1aSNick Terrell U32 const length = 1U << ((tableLog - nbBits) & 0x1F /* quiet static-analyzer */);
8792aa14b1aSNick Terrell const sortedSymbol_t* ptr;
8802aa14b1aSNick Terrell assert(level >= 1 && level <= 2);
8812aa14b1aSNick Terrell switch (length) {
8822aa14b1aSNick Terrell case 1:
8832aa14b1aSNick Terrell for (ptr = begin; ptr != end; ++ptr) {
8842aa14b1aSNick Terrell HUF_DEltX2 const DElt = HUF_buildDEltX2(ptr->symbol, nbBits, baseSeq, level);
8852aa14b1aSNick Terrell *DTableRank++ = DElt;
8862aa14b1aSNick Terrell }
8872aa14b1aSNick Terrell break;
8882aa14b1aSNick Terrell case 2:
8892aa14b1aSNick Terrell for (ptr = begin; ptr != end; ++ptr) {
8902aa14b1aSNick Terrell HUF_DEltX2 const DElt = HUF_buildDEltX2(ptr->symbol, nbBits, baseSeq, level);
8912aa14b1aSNick Terrell DTableRank[0] = DElt;
8922aa14b1aSNick Terrell DTableRank[1] = DElt;
8932aa14b1aSNick Terrell DTableRank += 2;
8942aa14b1aSNick Terrell }
8952aa14b1aSNick Terrell break;
8962aa14b1aSNick Terrell case 4:
8972aa14b1aSNick Terrell for (ptr = begin; ptr != end; ++ptr) {
8982aa14b1aSNick Terrell U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level);
8992aa14b1aSNick Terrell ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2));
9002aa14b1aSNick Terrell ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2));
9012aa14b1aSNick Terrell DTableRank += 4;
9022aa14b1aSNick Terrell }
9032aa14b1aSNick Terrell break;
9042aa14b1aSNick Terrell case 8:
9052aa14b1aSNick Terrell for (ptr = begin; ptr != end; ++ptr) {
9062aa14b1aSNick Terrell U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level);
9072aa14b1aSNick Terrell ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2));
9082aa14b1aSNick Terrell ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2));
9092aa14b1aSNick Terrell ZSTD_memcpy(DTableRank + 4, &DEltX2, sizeof(DEltX2));
9102aa14b1aSNick Terrell ZSTD_memcpy(DTableRank + 6, &DEltX2, sizeof(DEltX2));
9112aa14b1aSNick Terrell DTableRank += 8;
9122aa14b1aSNick Terrell }
9132aa14b1aSNick Terrell break;
9142aa14b1aSNick Terrell default:
9152aa14b1aSNick Terrell for (ptr = begin; ptr != end; ++ptr) {
9162aa14b1aSNick Terrell U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level);
9172aa14b1aSNick Terrell HUF_DEltX2* const DTableRankEnd = DTableRank + length;
9182aa14b1aSNick Terrell for (; DTableRank != DTableRankEnd; DTableRank += 8) {
9192aa14b1aSNick Terrell ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2));
9202aa14b1aSNick Terrell ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2));
9212aa14b1aSNick Terrell ZSTD_memcpy(DTableRank + 4, &DEltX2, sizeof(DEltX2));
9222aa14b1aSNick Terrell ZSTD_memcpy(DTableRank + 6, &DEltX2, sizeof(DEltX2));
9232aa14b1aSNick Terrell }
9242aa14b1aSNick Terrell }
9252aa14b1aSNick Terrell break;
9262aa14b1aSNick Terrell }
9272aa14b1aSNick Terrell }
928e0c1b49fSNick Terrell
929e0c1b49fSNick Terrell /* HUF_fillDTableX2Level2() :
930e0c1b49fSNick Terrell * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
HUF_fillDTableX2Level2(HUF_DEltX2 * DTable,U32 targetLog,const U32 consumedBits,const U32 * rankVal,const int minWeight,const int maxWeight1,const sortedSymbol_t * sortedSymbols,U32 const * rankStart,U32 nbBitsBaseline,U16 baseSeq)9312aa14b1aSNick Terrell static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 targetLog, const U32 consumedBits,
9322aa14b1aSNick Terrell const U32* rankVal, const int minWeight, const int maxWeight1,
9332aa14b1aSNick Terrell const sortedSymbol_t* sortedSymbols, U32 const* rankStart,
9342aa14b1aSNick Terrell U32 nbBitsBaseline, U16 baseSeq)
935e0c1b49fSNick Terrell {
9362aa14b1aSNick Terrell /* Fill skipped values (all positions up to rankVal[minWeight]).
9372aa14b1aSNick Terrell * These are positions only get a single symbol because the combined weight
9382aa14b1aSNick Terrell * is too large.
9392aa14b1aSNick Terrell */
940e0c1b49fSNick Terrell if (minWeight>1) {
9412aa14b1aSNick Terrell U32 const length = 1U << ((targetLog - consumedBits) & 0x1F /* quiet static-analyzer */);
9422aa14b1aSNick Terrell U64 const DEltX2 = HUF_buildDEltX2U64(baseSeq, consumedBits, /* baseSeq */ 0, /* level */ 1);
9432aa14b1aSNick Terrell int const skipSize = rankVal[minWeight];
9442aa14b1aSNick Terrell assert(length > 1);
9452aa14b1aSNick Terrell assert((U32)skipSize < length);
9462aa14b1aSNick Terrell switch (length) {
9472aa14b1aSNick Terrell case 2:
9482aa14b1aSNick Terrell assert(skipSize == 1);
9492aa14b1aSNick Terrell ZSTD_memcpy(DTable, &DEltX2, sizeof(DEltX2));
9502aa14b1aSNick Terrell break;
9512aa14b1aSNick Terrell case 4:
9522aa14b1aSNick Terrell assert(skipSize <= 4);
9532aa14b1aSNick Terrell ZSTD_memcpy(DTable + 0, &DEltX2, sizeof(DEltX2));
9542aa14b1aSNick Terrell ZSTD_memcpy(DTable + 2, &DEltX2, sizeof(DEltX2));
9552aa14b1aSNick Terrell break;
9562aa14b1aSNick Terrell default:
9572aa14b1aSNick Terrell {
9582aa14b1aSNick Terrell int i;
9592aa14b1aSNick Terrell for (i = 0; i < skipSize; i += 8) {
9602aa14b1aSNick Terrell ZSTD_memcpy(DTable + i + 0, &DEltX2, sizeof(DEltX2));
9612aa14b1aSNick Terrell ZSTD_memcpy(DTable + i + 2, &DEltX2, sizeof(DEltX2));
9622aa14b1aSNick Terrell ZSTD_memcpy(DTable + i + 4, &DEltX2, sizeof(DEltX2));
9632aa14b1aSNick Terrell ZSTD_memcpy(DTable + i + 6, &DEltX2, sizeof(DEltX2));
9642aa14b1aSNick Terrell }
9652aa14b1aSNick Terrell }
9662aa14b1aSNick Terrell }
967e0c1b49fSNick Terrell }
968e0c1b49fSNick Terrell
9692aa14b1aSNick Terrell /* Fill each of the second level symbols by weight. */
9702aa14b1aSNick Terrell {
9712aa14b1aSNick Terrell int w;
9722aa14b1aSNick Terrell for (w = minWeight; w < maxWeight1; ++w) {
9732aa14b1aSNick Terrell int const begin = rankStart[w];
9742aa14b1aSNick Terrell int const end = rankStart[w+1];
9752aa14b1aSNick Terrell U32 const nbBits = nbBitsBaseline - w;
9762aa14b1aSNick Terrell U32 const totalBits = nbBits + consumedBits;
9772aa14b1aSNick Terrell HUF_fillDTableX2ForWeight(
9782aa14b1aSNick Terrell DTable + rankVal[w],
9792aa14b1aSNick Terrell sortedSymbols + begin, sortedSymbols + end,
9802aa14b1aSNick Terrell totalBits, targetLog,
9812aa14b1aSNick Terrell baseSeq, /* level */ 2);
982e0c1b49fSNick Terrell }
9832aa14b1aSNick Terrell }
9842aa14b1aSNick Terrell }
985e0c1b49fSNick Terrell
HUF_fillDTableX2(HUF_DEltX2 * DTable,const U32 targetLog,const sortedSymbol_t * sortedList,const U32 * rankStart,rankValCol_t * rankValOrigin,const U32 maxWeight,const U32 nbBitsBaseline)986e0c1b49fSNick Terrell static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
9872aa14b1aSNick Terrell const sortedSymbol_t* sortedList,
988*780f6a9aSKees Cook const U32* rankStart, rankValCol_t *rankValOrigin, const U32 maxWeight,
9892aa14b1aSNick Terrell const U32 nbBitsBaseline)
990e0c1b49fSNick Terrell {
9912aa14b1aSNick Terrell U32* const rankVal = rankValOrigin[0];
992e0c1b49fSNick Terrell const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */
993e0c1b49fSNick Terrell const U32 minBits = nbBitsBaseline - maxWeight;
9942aa14b1aSNick Terrell int w;
9952aa14b1aSNick Terrell int const wEnd = (int)maxWeight + 1;
996e0c1b49fSNick Terrell
9972aa14b1aSNick Terrell /* Fill DTable in order of weight. */
9982aa14b1aSNick Terrell for (w = 1; w < wEnd; ++w) {
9992aa14b1aSNick Terrell int const begin = (int)rankStart[w];
10002aa14b1aSNick Terrell int const end = (int)rankStart[w+1];
10012aa14b1aSNick Terrell U32 const nbBits = nbBitsBaseline - w;
1002e0c1b49fSNick Terrell
10032aa14b1aSNick Terrell if (targetLog-nbBits >= minBits) {
10042aa14b1aSNick Terrell /* Enough room for a second symbol. */
10052aa14b1aSNick Terrell int start = rankVal[w];
10062aa14b1aSNick Terrell U32 const length = 1U << ((targetLog - nbBits) & 0x1F /* quiet static-analyzer */);
1007e0c1b49fSNick Terrell int minWeight = nbBits + scaleLog;
10082aa14b1aSNick Terrell int s;
1009e0c1b49fSNick Terrell if (minWeight < 1) minWeight = 1;
10102aa14b1aSNick Terrell /* Fill the DTable for every symbol of weight w.
10112aa14b1aSNick Terrell * These symbols get at least 1 second symbol.
10122aa14b1aSNick Terrell */
10132aa14b1aSNick Terrell for (s = begin; s != end; ++s) {
10142aa14b1aSNick Terrell HUF_fillDTableX2Level2(
10152aa14b1aSNick Terrell DTable + start, targetLog, nbBits,
10162aa14b1aSNick Terrell rankValOrigin[nbBits], minWeight, wEnd,
10172aa14b1aSNick Terrell sortedList, rankStart,
10182aa14b1aSNick Terrell nbBitsBaseline, sortedList[s].symbol);
10192aa14b1aSNick Terrell start += length;
10202aa14b1aSNick Terrell }
1021e0c1b49fSNick Terrell } else {
10222aa14b1aSNick Terrell /* Only a single symbol. */
10232aa14b1aSNick Terrell HUF_fillDTableX2ForWeight(
10242aa14b1aSNick Terrell DTable + rankVal[w],
10252aa14b1aSNick Terrell sortedList + begin, sortedList + end,
10262aa14b1aSNick Terrell nbBits, targetLog,
10272aa14b1aSNick Terrell /* baseSeq */ 0, /* level */ 1);
10282aa14b1aSNick Terrell }
1029e0c1b49fSNick Terrell }
1030e0c1b49fSNick Terrell }
1031e0c1b49fSNick Terrell
1032e0c1b49fSNick Terrell typedef struct {
1033e0c1b49fSNick Terrell rankValCol_t rankVal[HUF_TABLELOG_MAX];
1034e0c1b49fSNick Terrell U32 rankStats[HUF_TABLELOG_MAX + 1];
10352aa14b1aSNick Terrell U32 rankStart0[HUF_TABLELOG_MAX + 3];
1036e0c1b49fSNick Terrell sortedSymbol_t sortedSymbol[HUF_SYMBOLVALUE_MAX + 1];
1037e0c1b49fSNick Terrell BYTE weightList[HUF_SYMBOLVALUE_MAX + 1];
1038e0c1b49fSNick Terrell U32 calleeWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
1039e0c1b49fSNick Terrell } HUF_ReadDTableX2_Workspace;
1040e0c1b49fSNick Terrell
HUF_readDTableX2_wksp(HUF_DTable * DTable,const void * src,size_t srcSize,void * workSpace,size_t wkspSize)1041e0c1b49fSNick Terrell size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
1042e0c1b49fSNick Terrell const void* src, size_t srcSize,
1043e0c1b49fSNick Terrell void* workSpace, size_t wkspSize)
1044e0c1b49fSNick Terrell {
10452aa14b1aSNick Terrell return HUF_readDTableX2_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0);
10462aa14b1aSNick Terrell }
10472aa14b1aSNick Terrell
HUF_readDTableX2_wksp_bmi2(HUF_DTable * DTable,const void * src,size_t srcSize,void * workSpace,size_t wkspSize,int bmi2)10482aa14b1aSNick Terrell size_t HUF_readDTableX2_wksp_bmi2(HUF_DTable* DTable,
10492aa14b1aSNick Terrell const void* src, size_t srcSize,
10502aa14b1aSNick Terrell void* workSpace, size_t wkspSize, int bmi2)
10512aa14b1aSNick Terrell {
10522aa14b1aSNick Terrell U32 tableLog, maxW, nbSymbols;
1053e0c1b49fSNick Terrell DTableDesc dtd = HUF_getDTableDesc(DTable);
10542aa14b1aSNick Terrell U32 maxTableLog = dtd.maxTableLog;
1055e0c1b49fSNick Terrell size_t iSize;
1056e0c1b49fSNick Terrell void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */
1057e0c1b49fSNick Terrell HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
1058e0c1b49fSNick Terrell U32 *rankStart;
1059e0c1b49fSNick Terrell
1060e0c1b49fSNick Terrell HUF_ReadDTableX2_Workspace* const wksp = (HUF_ReadDTableX2_Workspace*)workSpace;
1061e0c1b49fSNick Terrell
1062e0c1b49fSNick Terrell if (sizeof(*wksp) > wkspSize) return ERROR(GENERIC);
1063e0c1b49fSNick Terrell
1064e0c1b49fSNick Terrell rankStart = wksp->rankStart0 + 1;
1065e0c1b49fSNick Terrell ZSTD_memset(wksp->rankStats, 0, sizeof(wksp->rankStats));
1066e0c1b49fSNick Terrell ZSTD_memset(wksp->rankStart0, 0, sizeof(wksp->rankStart0));
1067e0c1b49fSNick Terrell
1068e0c1b49fSNick Terrell DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */
1069e0c1b49fSNick Terrell if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
1070e0c1b49fSNick Terrell /* ZSTD_memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
1071e0c1b49fSNick Terrell
10722aa14b1aSNick Terrell iSize = HUF_readStats_wksp(wksp->weightList, HUF_SYMBOLVALUE_MAX + 1, wksp->rankStats, &nbSymbols, &tableLog, src, srcSize, wksp->calleeWksp, sizeof(wksp->calleeWksp), bmi2);
1073e0c1b49fSNick Terrell if (HUF_isError(iSize)) return iSize;
1074e0c1b49fSNick Terrell
1075e0c1b49fSNick Terrell /* check result */
1076e0c1b49fSNick Terrell if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */
10772aa14b1aSNick Terrell if (tableLog <= HUF_DECODER_FAST_TABLELOG && maxTableLog > HUF_DECODER_FAST_TABLELOG) maxTableLog = HUF_DECODER_FAST_TABLELOG;
1078e0c1b49fSNick Terrell
1079e0c1b49fSNick Terrell /* find maxWeight */
1080e0c1b49fSNick Terrell for (maxW = tableLog; wksp->rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */
1081e0c1b49fSNick Terrell
1082e0c1b49fSNick Terrell /* Get start index of each weight */
1083e0c1b49fSNick Terrell { U32 w, nextRankStart = 0;
1084e0c1b49fSNick Terrell for (w=1; w<maxW+1; w++) {
1085e0c1b49fSNick Terrell U32 curr = nextRankStart;
1086e0c1b49fSNick Terrell nextRankStart += wksp->rankStats[w];
1087e0c1b49fSNick Terrell rankStart[w] = curr;
1088e0c1b49fSNick Terrell }
1089e0c1b49fSNick Terrell rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/
10902aa14b1aSNick Terrell rankStart[maxW+1] = nextRankStart;
1091e0c1b49fSNick Terrell }
1092e0c1b49fSNick Terrell
1093e0c1b49fSNick Terrell /* sort symbols by weight */
1094e0c1b49fSNick Terrell { U32 s;
1095e0c1b49fSNick Terrell for (s=0; s<nbSymbols; s++) {
1096e0c1b49fSNick Terrell U32 const w = wksp->weightList[s];
1097e0c1b49fSNick Terrell U32 const r = rankStart[w]++;
1098e0c1b49fSNick Terrell wksp->sortedSymbol[r].symbol = (BYTE)s;
1099e0c1b49fSNick Terrell }
1100e0c1b49fSNick Terrell rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */
1101e0c1b49fSNick Terrell }
1102e0c1b49fSNick Terrell
1103e0c1b49fSNick Terrell /* Build rankVal */
1104e0c1b49fSNick Terrell { U32* const rankVal0 = wksp->rankVal[0];
1105e0c1b49fSNick Terrell { int const rescale = (maxTableLog-tableLog) - 1; /* tableLog <= maxTableLog */
1106e0c1b49fSNick Terrell U32 nextRankVal = 0;
1107e0c1b49fSNick Terrell U32 w;
1108e0c1b49fSNick Terrell for (w=1; w<maxW+1; w++) {
1109e0c1b49fSNick Terrell U32 curr = nextRankVal;
1110e0c1b49fSNick Terrell nextRankVal += wksp->rankStats[w] << (w+rescale);
1111e0c1b49fSNick Terrell rankVal0[w] = curr;
1112e0c1b49fSNick Terrell } }
1113e0c1b49fSNick Terrell { U32 const minBits = tableLog+1 - maxW;
1114e0c1b49fSNick Terrell U32 consumed;
1115e0c1b49fSNick Terrell for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
1116e0c1b49fSNick Terrell U32* const rankValPtr = wksp->rankVal[consumed];
1117e0c1b49fSNick Terrell U32 w;
1118e0c1b49fSNick Terrell for (w = 1; w < maxW+1; w++) {
1119e0c1b49fSNick Terrell rankValPtr[w] = rankVal0[w] >> consumed;
1120e0c1b49fSNick Terrell } } } }
1121e0c1b49fSNick Terrell
1122e0c1b49fSNick Terrell HUF_fillDTableX2(dt, maxTableLog,
11232aa14b1aSNick Terrell wksp->sortedSymbol,
1124e0c1b49fSNick Terrell wksp->rankStart0, wksp->rankVal, maxW,
11252aa14b1aSNick Terrell tableLog+1);
1126e0c1b49fSNick Terrell
1127e0c1b49fSNick Terrell dtd.tableLog = (BYTE)maxTableLog;
1128e0c1b49fSNick Terrell dtd.tableType = 1;
1129e0c1b49fSNick Terrell ZSTD_memcpy(DTable, &dtd, sizeof(dtd));
1130e0c1b49fSNick Terrell return iSize;
1131e0c1b49fSNick Terrell }
1132e0c1b49fSNick Terrell
1133e0c1b49fSNick Terrell
1134e0c1b49fSNick Terrell FORCE_INLINE_TEMPLATE U32
HUF_decodeSymbolX2(void * op,BIT_DStream_t * DStream,const HUF_DEltX2 * dt,const U32 dtLog)1135e0c1b49fSNick Terrell HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
1136e0c1b49fSNick Terrell {
1137e0c1b49fSNick Terrell size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
11382aa14b1aSNick Terrell ZSTD_memcpy(op, &dt[val].sequence, 2);
1139e0c1b49fSNick Terrell BIT_skipBits(DStream, dt[val].nbBits);
1140e0c1b49fSNick Terrell return dt[val].length;
1141e0c1b49fSNick Terrell }
1142e0c1b49fSNick Terrell
1143e0c1b49fSNick Terrell FORCE_INLINE_TEMPLATE U32
HUF_decodeLastSymbolX2(void * op,BIT_DStream_t * DStream,const HUF_DEltX2 * dt,const U32 dtLog)1144e0c1b49fSNick Terrell HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
1145e0c1b49fSNick Terrell {
1146e0c1b49fSNick Terrell size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
11472aa14b1aSNick Terrell ZSTD_memcpy(op, &dt[val].sequence, 1);
11482aa14b1aSNick Terrell if (dt[val].length==1) {
11492aa14b1aSNick Terrell BIT_skipBits(DStream, dt[val].nbBits);
11502aa14b1aSNick Terrell } else {
1151e0c1b49fSNick Terrell if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
1152e0c1b49fSNick Terrell BIT_skipBits(DStream, dt[val].nbBits);
1153e0c1b49fSNick Terrell if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
1154e0c1b49fSNick Terrell /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
1155e0c1b49fSNick Terrell DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
11562aa14b1aSNick Terrell }
11572aa14b1aSNick Terrell }
1158e0c1b49fSNick Terrell return 1;
1159e0c1b49fSNick Terrell }
1160e0c1b49fSNick Terrell
1161e0c1b49fSNick Terrell #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
1162e0c1b49fSNick Terrell ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
1163e0c1b49fSNick Terrell
1164e0c1b49fSNick Terrell #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
1165e0c1b49fSNick Terrell if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
1166e0c1b49fSNick Terrell ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
1167e0c1b49fSNick Terrell
1168e0c1b49fSNick Terrell #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
1169e0c1b49fSNick Terrell if (MEM_64bits()) \
1170e0c1b49fSNick Terrell ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
1171e0c1b49fSNick Terrell
1172e0c1b49fSNick Terrell HINT_INLINE size_t
HUF_decodeStreamX2(BYTE * p,BIT_DStream_t * bitDPtr,BYTE * const pEnd,const HUF_DEltX2 * const dt,const U32 dtLog)1173e0c1b49fSNick Terrell HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
1174e0c1b49fSNick Terrell const HUF_DEltX2* const dt, const U32 dtLog)
1175e0c1b49fSNick Terrell {
1176e0c1b49fSNick Terrell BYTE* const pStart = p;
1177e0c1b49fSNick Terrell
1178e0c1b49fSNick Terrell /* up to 8 symbols at a time */
11792aa14b1aSNick Terrell if ((size_t)(pEnd - p) >= sizeof(bitDPtr->bitContainer)) {
11802aa14b1aSNick Terrell if (dtLog <= 11 && MEM_64bits()) {
11812aa14b1aSNick Terrell /* up to 10 symbols at a time */
11822aa14b1aSNick Terrell while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-9)) {
11832aa14b1aSNick Terrell HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
11842aa14b1aSNick Terrell HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
11852aa14b1aSNick Terrell HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
11862aa14b1aSNick Terrell HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
11872aa14b1aSNick Terrell HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
11882aa14b1aSNick Terrell }
11892aa14b1aSNick Terrell } else {
11902aa14b1aSNick Terrell /* up to 8 symbols at a time */
1191e0c1b49fSNick Terrell while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
1192e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
1193e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
1194e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
1195e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
1196e0c1b49fSNick Terrell }
11972aa14b1aSNick Terrell }
11982aa14b1aSNick Terrell } else {
11992aa14b1aSNick Terrell BIT_reloadDStream(bitDPtr);
12002aa14b1aSNick Terrell }
1201e0c1b49fSNick Terrell
1202e0c1b49fSNick Terrell /* closer to end : up to 2 symbols at a time */
12032aa14b1aSNick Terrell if ((size_t)(pEnd - p) >= 2) {
1204e0c1b49fSNick Terrell while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
1205e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
1206e0c1b49fSNick Terrell
1207e0c1b49fSNick Terrell while (p <= pEnd-2)
1208e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
12092aa14b1aSNick Terrell }
1210e0c1b49fSNick Terrell
1211e0c1b49fSNick Terrell if (p < pEnd)
1212e0c1b49fSNick Terrell p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog);
1213e0c1b49fSNick Terrell
1214e0c1b49fSNick Terrell return p-pStart;
1215e0c1b49fSNick Terrell }
1216e0c1b49fSNick Terrell
1217e0c1b49fSNick Terrell FORCE_INLINE_TEMPLATE size_t
HUF_decompress1X2_usingDTable_internal_body(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)1218e0c1b49fSNick Terrell HUF_decompress1X2_usingDTable_internal_body(
1219e0c1b49fSNick Terrell void* dst, size_t dstSize,
1220e0c1b49fSNick Terrell const void* cSrc, size_t cSrcSize,
1221e0c1b49fSNick Terrell const HUF_DTable* DTable)
1222e0c1b49fSNick Terrell {
1223e0c1b49fSNick Terrell BIT_DStream_t bitD;
1224e0c1b49fSNick Terrell
1225e0c1b49fSNick Terrell /* Init */
1226e0c1b49fSNick Terrell CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
1227e0c1b49fSNick Terrell
1228e0c1b49fSNick Terrell /* decode */
1229e0c1b49fSNick Terrell { BYTE* const ostart = (BYTE*) dst;
1230e0c1b49fSNick Terrell BYTE* const oend = ostart + dstSize;
1231e0c1b49fSNick Terrell const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */
1232e0c1b49fSNick Terrell const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
1233e0c1b49fSNick Terrell DTableDesc const dtd = HUF_getDTableDesc(DTable);
1234e0c1b49fSNick Terrell HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog);
1235e0c1b49fSNick Terrell }
1236e0c1b49fSNick Terrell
1237e0c1b49fSNick Terrell /* check */
1238e0c1b49fSNick Terrell if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
1239e0c1b49fSNick Terrell
1240e0c1b49fSNick Terrell /* decoded size */
1241e0c1b49fSNick Terrell return dstSize;
1242e0c1b49fSNick Terrell }
1243e0c1b49fSNick Terrell FORCE_INLINE_TEMPLATE size_t
HUF_decompress4X2_usingDTable_internal_body(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)1244e0c1b49fSNick Terrell HUF_decompress4X2_usingDTable_internal_body(
1245e0c1b49fSNick Terrell void* dst, size_t dstSize,
1246e0c1b49fSNick Terrell const void* cSrc, size_t cSrcSize,
1247e0c1b49fSNick Terrell const HUF_DTable* DTable)
1248e0c1b49fSNick Terrell {
1249e0c1b49fSNick Terrell if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
1250e0c1b49fSNick Terrell
1251e0c1b49fSNick Terrell { const BYTE* const istart = (const BYTE*) cSrc;
1252e0c1b49fSNick Terrell BYTE* const ostart = (BYTE*) dst;
1253e0c1b49fSNick Terrell BYTE* const oend = ostart + dstSize;
1254e0c1b49fSNick Terrell BYTE* const olimit = oend - (sizeof(size_t)-1);
1255e0c1b49fSNick Terrell const void* const dtPtr = DTable+1;
1256e0c1b49fSNick Terrell const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
1257e0c1b49fSNick Terrell
1258e0c1b49fSNick Terrell /* Init */
1259e0c1b49fSNick Terrell BIT_DStream_t bitD1;
1260e0c1b49fSNick Terrell BIT_DStream_t bitD2;
1261e0c1b49fSNick Terrell BIT_DStream_t bitD3;
1262e0c1b49fSNick Terrell BIT_DStream_t bitD4;
1263e0c1b49fSNick Terrell size_t const length1 = MEM_readLE16(istart);
1264e0c1b49fSNick Terrell size_t const length2 = MEM_readLE16(istart+2);
1265e0c1b49fSNick Terrell size_t const length3 = MEM_readLE16(istart+4);
1266e0c1b49fSNick Terrell size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
1267e0c1b49fSNick Terrell const BYTE* const istart1 = istart + 6; /* jumpTable */
1268e0c1b49fSNick Terrell const BYTE* const istart2 = istart1 + length1;
1269e0c1b49fSNick Terrell const BYTE* const istart3 = istart2 + length2;
1270e0c1b49fSNick Terrell const BYTE* const istart4 = istart3 + length3;
1271e0c1b49fSNick Terrell size_t const segmentSize = (dstSize+3) / 4;
1272e0c1b49fSNick Terrell BYTE* const opStart2 = ostart + segmentSize;
1273e0c1b49fSNick Terrell BYTE* const opStart3 = opStart2 + segmentSize;
1274e0c1b49fSNick Terrell BYTE* const opStart4 = opStart3 + segmentSize;
1275e0c1b49fSNick Terrell BYTE* op1 = ostart;
1276e0c1b49fSNick Terrell BYTE* op2 = opStart2;
1277e0c1b49fSNick Terrell BYTE* op3 = opStart3;
1278e0c1b49fSNick Terrell BYTE* op4 = opStart4;
1279e0c1b49fSNick Terrell U32 endSignal = 1;
1280e0c1b49fSNick Terrell DTableDesc const dtd = HUF_getDTableDesc(DTable);
1281e0c1b49fSNick Terrell U32 const dtLog = dtd.tableLog;
1282e0c1b49fSNick Terrell
1283e0c1b49fSNick Terrell if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
12842aa14b1aSNick Terrell if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */
1285e0c1b49fSNick Terrell CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
1286e0c1b49fSNick Terrell CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
1287e0c1b49fSNick Terrell CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
1288e0c1b49fSNick Terrell CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
1289e0c1b49fSNick Terrell
1290e0c1b49fSNick Terrell /* 16-32 symbols per loop (4-8 symbols per stream) */
12912aa14b1aSNick Terrell if ((size_t)(oend - op4) >= sizeof(size_t)) {
1292e0c1b49fSNick Terrell for ( ; (endSignal) & (op4 < olimit); ) {
1293e0c1b49fSNick Terrell #if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
1294e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
1295e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
1296e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
1297e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
1298e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
1299e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
1300e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
1301e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
1302e0c1b49fSNick Terrell endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
1303e0c1b49fSNick Terrell endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
1304e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
1305e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
1306e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
1307e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
1308e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
1309e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
1310e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
1311e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
1312e0c1b49fSNick Terrell endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
1313e0c1b49fSNick Terrell endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
1314e0c1b49fSNick Terrell #else
1315e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
1316e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
1317e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
1318e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
1319e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
1320e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
1321e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
1322e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
1323e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
1324e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
1325e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
1326e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
1327e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
1328e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
1329e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
1330e0c1b49fSNick Terrell HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
13310a8ea235SNathan Chancellor endSignal = (U32)LIKELY((U32)
1332e0c1b49fSNick Terrell (BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished)
1333e0c1b49fSNick Terrell & (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished)
1334e0c1b49fSNick Terrell & (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished)
1335e0c1b49fSNick Terrell & (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished));
1336e0c1b49fSNick Terrell #endif
1337e0c1b49fSNick Terrell }
13382aa14b1aSNick Terrell }
1339e0c1b49fSNick Terrell
1340e0c1b49fSNick Terrell /* check corruption */
1341e0c1b49fSNick Terrell if (op1 > opStart2) return ERROR(corruption_detected);
1342e0c1b49fSNick Terrell if (op2 > opStart3) return ERROR(corruption_detected);
1343e0c1b49fSNick Terrell if (op3 > opStart4) return ERROR(corruption_detected);
1344e0c1b49fSNick Terrell /* note : op4 already verified within main loop */
1345e0c1b49fSNick Terrell
1346e0c1b49fSNick Terrell /* finish bitStreams one by one */
1347e0c1b49fSNick Terrell HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
1348e0c1b49fSNick Terrell HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
1349e0c1b49fSNick Terrell HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
1350e0c1b49fSNick Terrell HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
1351e0c1b49fSNick Terrell
1352e0c1b49fSNick Terrell /* check */
1353e0c1b49fSNick Terrell { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
1354e0c1b49fSNick Terrell if (!endCheck) return ERROR(corruption_detected); }
1355e0c1b49fSNick Terrell
1356e0c1b49fSNick Terrell /* decoded size */
1357e0c1b49fSNick Terrell return dstSize;
1358e0c1b49fSNick Terrell }
1359e0c1b49fSNick Terrell }
1360e0c1b49fSNick Terrell
13612aa14b1aSNick Terrell #if HUF_NEED_BMI2_FUNCTION
13622aa14b1aSNick Terrell static BMI2_TARGET_ATTRIBUTE
HUF_decompress4X2_usingDTable_internal_bmi2(void * dst,size_t dstSize,void const * cSrc,size_t cSrcSize,HUF_DTable const * DTable)13632aa14b1aSNick Terrell size_t HUF_decompress4X2_usingDTable_internal_bmi2(void* dst, size_t dstSize, void const* cSrc,
13642aa14b1aSNick Terrell size_t cSrcSize, HUF_DTable const* DTable) {
13652aa14b1aSNick Terrell return HUF_decompress4X2_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
13662aa14b1aSNick Terrell }
13672aa14b1aSNick Terrell #endif
13682aa14b1aSNick Terrell
13692aa14b1aSNick Terrell #if HUF_NEED_DEFAULT_FUNCTION
13702aa14b1aSNick Terrell static
HUF_decompress4X2_usingDTable_internal_default(void * dst,size_t dstSize,void const * cSrc,size_t cSrcSize,HUF_DTable const * DTable)13712aa14b1aSNick Terrell size_t HUF_decompress4X2_usingDTable_internal_default(void* dst, size_t dstSize, void const* cSrc,
13722aa14b1aSNick Terrell size_t cSrcSize, HUF_DTable const* DTable) {
13732aa14b1aSNick Terrell return HUF_decompress4X2_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
13742aa14b1aSNick Terrell }
13752aa14b1aSNick Terrell #endif
13762aa14b1aSNick Terrell
13772aa14b1aSNick Terrell #if ZSTD_ENABLE_ASM_X86_64_BMI2
13782aa14b1aSNick Terrell
13792aa14b1aSNick Terrell HUF_ASM_DECL void HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop(HUF_DecompressAsmArgs* args) ZSTDLIB_HIDDEN;
13802aa14b1aSNick Terrell
13812aa14b1aSNick Terrell static HUF_ASM_X86_64_BMI2_ATTRS size_t
HUF_decompress4X2_usingDTable_internal_bmi2_asm(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)13822aa14b1aSNick Terrell HUF_decompress4X2_usingDTable_internal_bmi2_asm(
13832aa14b1aSNick Terrell void* dst, size_t dstSize,
13842aa14b1aSNick Terrell const void* cSrc, size_t cSrcSize,
13852aa14b1aSNick Terrell const HUF_DTable* DTable) {
13862aa14b1aSNick Terrell void const* dt = DTable + 1;
13872aa14b1aSNick Terrell const BYTE* const iend = (const BYTE*)cSrc + 6;
13882aa14b1aSNick Terrell BYTE* const oend = (BYTE*)dst + dstSize;
13892aa14b1aSNick Terrell HUF_DecompressAsmArgs args;
13902aa14b1aSNick Terrell {
13912aa14b1aSNick Terrell size_t const ret = HUF_DecompressAsmArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
13922aa14b1aSNick Terrell FORWARD_IF_ERROR(ret, "Failed to init asm args");
13932aa14b1aSNick Terrell if (ret != 0)
13942aa14b1aSNick Terrell return HUF_decompress4X2_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
13952aa14b1aSNick Terrell }
13962aa14b1aSNick Terrell
13972aa14b1aSNick Terrell assert(args.ip[0] >= args.ilimit);
13982aa14b1aSNick Terrell HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop(&args);
13992aa14b1aSNick Terrell
14002aa14b1aSNick Terrell /* note : op4 already verified within main loop */
14012aa14b1aSNick Terrell assert(args.ip[0] >= iend);
14022aa14b1aSNick Terrell assert(args.ip[1] >= iend);
14032aa14b1aSNick Terrell assert(args.ip[2] >= iend);
14042aa14b1aSNick Terrell assert(args.ip[3] >= iend);
14052aa14b1aSNick Terrell assert(args.op[3] <= oend);
14062aa14b1aSNick Terrell (void)iend;
14072aa14b1aSNick Terrell
14082aa14b1aSNick Terrell /* finish bitStreams one by one */
14092aa14b1aSNick Terrell {
14102aa14b1aSNick Terrell size_t const segmentSize = (dstSize+3) / 4;
14112aa14b1aSNick Terrell BYTE* segmentEnd = (BYTE*)dst;
14122aa14b1aSNick Terrell int i;
14132aa14b1aSNick Terrell for (i = 0; i < 4; ++i) {
14142aa14b1aSNick Terrell BIT_DStream_t bit;
14152aa14b1aSNick Terrell if (segmentSize <= (size_t)(oend - segmentEnd))
14162aa14b1aSNick Terrell segmentEnd += segmentSize;
14172aa14b1aSNick Terrell else
14182aa14b1aSNick Terrell segmentEnd = oend;
14192aa14b1aSNick Terrell FORWARD_IF_ERROR(HUF_initRemainingDStream(&bit, &args, i, segmentEnd), "corruption");
14202aa14b1aSNick Terrell args.op[i] += HUF_decodeStreamX2(args.op[i], &bit, segmentEnd, (HUF_DEltX2 const*)dt, HUF_DECODER_FAST_TABLELOG);
14212aa14b1aSNick Terrell if (args.op[i] != segmentEnd)
14222aa14b1aSNick Terrell return ERROR(corruption_detected);
14232aa14b1aSNick Terrell }
14242aa14b1aSNick Terrell }
14252aa14b1aSNick Terrell
14262aa14b1aSNick Terrell /* decoded size */
14272aa14b1aSNick Terrell return dstSize;
14282aa14b1aSNick Terrell }
14292aa14b1aSNick Terrell #endif /* ZSTD_ENABLE_ASM_X86_64_BMI2 */
14302aa14b1aSNick Terrell
HUF_decompress4X2_usingDTable_internal(void * dst,size_t dstSize,void const * cSrc,size_t cSrcSize,HUF_DTable const * DTable,int bmi2)14312aa14b1aSNick Terrell static size_t HUF_decompress4X2_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc,
14322aa14b1aSNick Terrell size_t cSrcSize, HUF_DTable const* DTable, int bmi2)
14332aa14b1aSNick Terrell {
14342aa14b1aSNick Terrell #if DYNAMIC_BMI2
14352aa14b1aSNick Terrell if (bmi2) {
14362aa14b1aSNick Terrell # if ZSTD_ENABLE_ASM_X86_64_BMI2
14372aa14b1aSNick Terrell return HUF_decompress4X2_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);
14382aa14b1aSNick Terrell # else
14392aa14b1aSNick Terrell return HUF_decompress4X2_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
14402aa14b1aSNick Terrell # endif
14412aa14b1aSNick Terrell }
14422aa14b1aSNick Terrell #else
14432aa14b1aSNick Terrell (void)bmi2;
14442aa14b1aSNick Terrell #endif
14452aa14b1aSNick Terrell
14462aa14b1aSNick Terrell #if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)
14472aa14b1aSNick Terrell return HUF_decompress4X2_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);
14482aa14b1aSNick Terrell #else
14492aa14b1aSNick Terrell return HUF_decompress4X2_usingDTable_internal_default(dst, dstSize, cSrc, cSrcSize, DTable);
14502aa14b1aSNick Terrell #endif
14512aa14b1aSNick Terrell }
14522aa14b1aSNick Terrell
HUF_DGEN(HUF_decompress1X2_usingDTable_internal)1453e0c1b49fSNick Terrell HUF_DGEN(HUF_decompress1X2_usingDTable_internal)
1454e0c1b49fSNick Terrell
1455e0c1b49fSNick Terrell size_t HUF_decompress1X2_usingDTable(
1456e0c1b49fSNick Terrell void* dst, size_t dstSize,
1457e0c1b49fSNick Terrell const void* cSrc, size_t cSrcSize,
1458e0c1b49fSNick Terrell const HUF_DTable* DTable)
1459e0c1b49fSNick Terrell {
1460e0c1b49fSNick Terrell DTableDesc dtd = HUF_getDTableDesc(DTable);
1461e0c1b49fSNick Terrell if (dtd.tableType != 1) return ERROR(GENERIC);
1462e0c1b49fSNick Terrell return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
1463e0c1b49fSNick Terrell }
1464e0c1b49fSNick Terrell
HUF_decompress1X2_DCtx_wksp(HUF_DTable * DCtx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize)1465e0c1b49fSNick Terrell size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
1466e0c1b49fSNick Terrell const void* cSrc, size_t cSrcSize,
1467e0c1b49fSNick Terrell void* workSpace, size_t wkspSize)
1468e0c1b49fSNick Terrell {
1469e0c1b49fSNick Terrell const BYTE* ip = (const BYTE*) cSrc;
1470e0c1b49fSNick Terrell
1471e0c1b49fSNick Terrell size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize,
1472e0c1b49fSNick Terrell workSpace, wkspSize);
1473e0c1b49fSNick Terrell if (HUF_isError(hSize)) return hSize;
1474e0c1b49fSNick Terrell if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
1475e0c1b49fSNick Terrell ip += hSize; cSrcSize -= hSize;
1476e0c1b49fSNick Terrell
1477e0c1b49fSNick Terrell return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
1478e0c1b49fSNick Terrell }
1479e0c1b49fSNick Terrell
1480e0c1b49fSNick Terrell
HUF_decompress4X2_usingDTable(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)1481e0c1b49fSNick Terrell size_t HUF_decompress4X2_usingDTable(
1482e0c1b49fSNick Terrell void* dst, size_t dstSize,
1483e0c1b49fSNick Terrell const void* cSrc, size_t cSrcSize,
1484e0c1b49fSNick Terrell const HUF_DTable* DTable)
1485e0c1b49fSNick Terrell {
1486e0c1b49fSNick Terrell DTableDesc dtd = HUF_getDTableDesc(DTable);
1487e0c1b49fSNick Terrell if (dtd.tableType != 1) return ERROR(GENERIC);
1488e0c1b49fSNick Terrell return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
1489e0c1b49fSNick Terrell }
1490e0c1b49fSNick Terrell
HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize,int bmi2)1491e0c1b49fSNick Terrell static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
1492e0c1b49fSNick Terrell const void* cSrc, size_t cSrcSize,
1493e0c1b49fSNick Terrell void* workSpace, size_t wkspSize, int bmi2)
1494e0c1b49fSNick Terrell {
1495e0c1b49fSNick Terrell const BYTE* ip = (const BYTE*) cSrc;
1496e0c1b49fSNick Terrell
1497e0c1b49fSNick Terrell size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize,
1498e0c1b49fSNick Terrell workSpace, wkspSize);
1499e0c1b49fSNick Terrell if (HUF_isError(hSize)) return hSize;
1500e0c1b49fSNick Terrell if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
1501e0c1b49fSNick Terrell ip += hSize; cSrcSize -= hSize;
1502e0c1b49fSNick Terrell
1503e0c1b49fSNick Terrell return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
1504e0c1b49fSNick Terrell }
1505e0c1b49fSNick Terrell
HUF_decompress4X2_DCtx_wksp(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize)1506e0c1b49fSNick Terrell size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
1507e0c1b49fSNick Terrell const void* cSrc, size_t cSrcSize,
1508e0c1b49fSNick Terrell void* workSpace, size_t wkspSize)
1509e0c1b49fSNick Terrell {
1510e0c1b49fSNick Terrell return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);
1511e0c1b49fSNick Terrell }
1512e0c1b49fSNick Terrell
1513e0c1b49fSNick Terrell
1514e0c1b49fSNick Terrell #endif /* HUF_FORCE_DECOMPRESS_X1 */
1515e0c1b49fSNick Terrell
1516e0c1b49fSNick Terrell
1517e0c1b49fSNick Terrell /* ***********************************/
1518e0c1b49fSNick Terrell /* Universal decompression selectors */
1519e0c1b49fSNick Terrell /* ***********************************/
1520e0c1b49fSNick Terrell
HUF_decompress1X_usingDTable(void * dst,size_t maxDstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)1521e0c1b49fSNick Terrell size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,
1522e0c1b49fSNick Terrell const void* cSrc, size_t cSrcSize,
1523e0c1b49fSNick Terrell const HUF_DTable* DTable)
1524e0c1b49fSNick Terrell {
1525e0c1b49fSNick Terrell DTableDesc const dtd = HUF_getDTableDesc(DTable);
1526e0c1b49fSNick Terrell #if defined(HUF_FORCE_DECOMPRESS_X1)
1527e0c1b49fSNick Terrell (void)dtd;
1528e0c1b49fSNick Terrell assert(dtd.tableType == 0);
1529e0c1b49fSNick Terrell return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
1530e0c1b49fSNick Terrell #elif defined(HUF_FORCE_DECOMPRESS_X2)
1531e0c1b49fSNick Terrell (void)dtd;
1532e0c1b49fSNick Terrell assert(dtd.tableType == 1);
1533e0c1b49fSNick Terrell return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
1534e0c1b49fSNick Terrell #else
1535e0c1b49fSNick Terrell return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
1536e0c1b49fSNick Terrell HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
1537e0c1b49fSNick Terrell #endif
1538e0c1b49fSNick Terrell }
1539e0c1b49fSNick Terrell
HUF_decompress4X_usingDTable(void * dst,size_t maxDstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)1540e0c1b49fSNick Terrell size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
1541e0c1b49fSNick Terrell const void* cSrc, size_t cSrcSize,
1542e0c1b49fSNick Terrell const HUF_DTable* DTable)
1543e0c1b49fSNick Terrell {
1544e0c1b49fSNick Terrell DTableDesc const dtd = HUF_getDTableDesc(DTable);
1545e0c1b49fSNick Terrell #if defined(HUF_FORCE_DECOMPRESS_X1)
1546e0c1b49fSNick Terrell (void)dtd;
1547e0c1b49fSNick Terrell assert(dtd.tableType == 0);
1548e0c1b49fSNick Terrell return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
1549e0c1b49fSNick Terrell #elif defined(HUF_FORCE_DECOMPRESS_X2)
1550e0c1b49fSNick Terrell (void)dtd;
1551e0c1b49fSNick Terrell assert(dtd.tableType == 1);
1552e0c1b49fSNick Terrell return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
1553e0c1b49fSNick Terrell #else
1554e0c1b49fSNick Terrell return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
1555e0c1b49fSNick Terrell HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
1556e0c1b49fSNick Terrell #endif
1557e0c1b49fSNick Terrell }
1558e0c1b49fSNick Terrell
1559e0c1b49fSNick Terrell
1560e0c1b49fSNick Terrell #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
1561e0c1b49fSNick Terrell typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
15622aa14b1aSNick Terrell static const algo_time_t algoTime[16 /* Quantization */][2 /* single, double */] =
1563e0c1b49fSNick Terrell {
1564e0c1b49fSNick Terrell /* single, double, quad */
15652aa14b1aSNick Terrell {{0,0}, {1,1}}, /* Q==0 : impossible */
15662aa14b1aSNick Terrell {{0,0}, {1,1}}, /* Q==1 : impossible */
15672aa14b1aSNick Terrell {{ 150,216}, { 381,119}}, /* Q == 2 : 12-18% */
15682aa14b1aSNick Terrell {{ 170,205}, { 514,112}}, /* Q == 3 : 18-25% */
15692aa14b1aSNick Terrell {{ 177,199}, { 539,110}}, /* Q == 4 : 25-32% */
15702aa14b1aSNick Terrell {{ 197,194}, { 644,107}}, /* Q == 5 : 32-38% */
15712aa14b1aSNick Terrell {{ 221,192}, { 735,107}}, /* Q == 6 : 38-44% */
15722aa14b1aSNick Terrell {{ 256,189}, { 881,106}}, /* Q == 7 : 44-50% */
15732aa14b1aSNick Terrell {{ 359,188}, {1167,109}}, /* Q == 8 : 50-56% */
15742aa14b1aSNick Terrell {{ 582,187}, {1570,114}}, /* Q == 9 : 56-62% */
15752aa14b1aSNick Terrell {{ 688,187}, {1712,122}}, /* Q ==10 : 62-69% */
15762aa14b1aSNick Terrell {{ 825,186}, {1965,136}}, /* Q ==11 : 69-75% */
15772aa14b1aSNick Terrell {{ 976,185}, {2131,150}}, /* Q ==12 : 75-81% */
15782aa14b1aSNick Terrell {{1180,186}, {2070,175}}, /* Q ==13 : 81-87% */
15792aa14b1aSNick Terrell {{1377,185}, {1731,202}}, /* Q ==14 : 87-93% */
15802aa14b1aSNick Terrell {{1412,185}, {1695,202}}, /* Q ==15 : 93-99% */
1581e0c1b49fSNick Terrell };
1582e0c1b49fSNick Terrell #endif
1583e0c1b49fSNick Terrell
1584e0c1b49fSNick Terrell /* HUF_selectDecoder() :
1585e0c1b49fSNick Terrell * Tells which decoder is likely to decode faster,
1586e0c1b49fSNick Terrell * based on a set of pre-computed metrics.
1587e0c1b49fSNick Terrell * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
1588e0c1b49fSNick Terrell * Assumption : 0 < dstSize <= 128 KB */
HUF_selectDecoder(size_t dstSize,size_t cSrcSize)1589e0c1b49fSNick Terrell U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
1590e0c1b49fSNick Terrell {
1591e0c1b49fSNick Terrell assert(dstSize > 0);
1592e0c1b49fSNick Terrell assert(dstSize <= 128*1024);
1593e0c1b49fSNick Terrell #if defined(HUF_FORCE_DECOMPRESS_X1)
1594e0c1b49fSNick Terrell (void)dstSize;
1595e0c1b49fSNick Terrell (void)cSrcSize;
1596e0c1b49fSNick Terrell return 0;
1597e0c1b49fSNick Terrell #elif defined(HUF_FORCE_DECOMPRESS_X2)
1598e0c1b49fSNick Terrell (void)dstSize;
1599e0c1b49fSNick Terrell (void)cSrcSize;
1600e0c1b49fSNick Terrell return 1;
1601e0c1b49fSNick Terrell #else
1602e0c1b49fSNick Terrell /* decoder timing evaluation */
1603e0c1b49fSNick Terrell { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */
1604e0c1b49fSNick Terrell U32 const D256 = (U32)(dstSize >> 8);
1605e0c1b49fSNick Terrell U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
1606e0c1b49fSNick Terrell U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
16072aa14b1aSNick Terrell DTime1 += DTime1 >> 5; /* small advantage to algorithm using less memory, to reduce cache eviction */
1608e0c1b49fSNick Terrell return DTime1 < DTime0;
1609e0c1b49fSNick Terrell }
1610e0c1b49fSNick Terrell #endif
1611e0c1b49fSNick Terrell }
1612e0c1b49fSNick Terrell
1613e0c1b49fSNick Terrell
HUF_decompress4X_hufOnly_wksp(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize)1614e0c1b49fSNick Terrell size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
1615e0c1b49fSNick Terrell size_t dstSize, const void* cSrc,
1616e0c1b49fSNick Terrell size_t cSrcSize, void* workSpace,
1617e0c1b49fSNick Terrell size_t wkspSize)
1618e0c1b49fSNick Terrell {
1619e0c1b49fSNick Terrell /* validation checks */
1620e0c1b49fSNick Terrell if (dstSize == 0) return ERROR(dstSize_tooSmall);
1621e0c1b49fSNick Terrell if (cSrcSize == 0) return ERROR(corruption_detected);
1622e0c1b49fSNick Terrell
1623e0c1b49fSNick Terrell { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1624e0c1b49fSNick Terrell #if defined(HUF_FORCE_DECOMPRESS_X1)
1625e0c1b49fSNick Terrell (void)algoNb;
1626e0c1b49fSNick Terrell assert(algoNb == 0);
1627e0c1b49fSNick Terrell return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1628e0c1b49fSNick Terrell #elif defined(HUF_FORCE_DECOMPRESS_X2)
1629e0c1b49fSNick Terrell (void)algoNb;
1630e0c1b49fSNick Terrell assert(algoNb == 1);
1631e0c1b49fSNick Terrell return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1632e0c1b49fSNick Terrell #else
1633e0c1b49fSNick Terrell return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1634e0c1b49fSNick Terrell cSrcSize, workSpace, wkspSize):
1635e0c1b49fSNick Terrell HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1636e0c1b49fSNick Terrell #endif
1637e0c1b49fSNick Terrell }
1638e0c1b49fSNick Terrell }
1639e0c1b49fSNick Terrell
HUF_decompress1X_DCtx_wksp(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize)1640e0c1b49fSNick Terrell size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
1641e0c1b49fSNick Terrell const void* cSrc, size_t cSrcSize,
1642e0c1b49fSNick Terrell void* workSpace, size_t wkspSize)
1643e0c1b49fSNick Terrell {
1644e0c1b49fSNick Terrell /* validation checks */
1645e0c1b49fSNick Terrell if (dstSize == 0) return ERROR(dstSize_tooSmall);
1646e0c1b49fSNick Terrell if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
1647e0c1b49fSNick Terrell if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
1648e0c1b49fSNick Terrell if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1649e0c1b49fSNick Terrell
1650e0c1b49fSNick Terrell { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1651e0c1b49fSNick Terrell #if defined(HUF_FORCE_DECOMPRESS_X1)
1652e0c1b49fSNick Terrell (void)algoNb;
1653e0c1b49fSNick Terrell assert(algoNb == 0);
1654e0c1b49fSNick Terrell return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
1655e0c1b49fSNick Terrell cSrcSize, workSpace, wkspSize);
1656e0c1b49fSNick Terrell #elif defined(HUF_FORCE_DECOMPRESS_X2)
1657e0c1b49fSNick Terrell (void)algoNb;
1658e0c1b49fSNick Terrell assert(algoNb == 1);
1659e0c1b49fSNick Terrell return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1660e0c1b49fSNick Terrell cSrcSize, workSpace, wkspSize);
1661e0c1b49fSNick Terrell #else
1662e0c1b49fSNick Terrell return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1663e0c1b49fSNick Terrell cSrcSize, workSpace, wkspSize):
1664e0c1b49fSNick Terrell HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
1665e0c1b49fSNick Terrell cSrcSize, workSpace, wkspSize);
1666e0c1b49fSNick Terrell #endif
1667e0c1b49fSNick Terrell }
1668e0c1b49fSNick Terrell }
1669e0c1b49fSNick Terrell
1670e0c1b49fSNick Terrell
HUF_decompress1X_usingDTable_bmi2(void * dst,size_t maxDstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable,int bmi2)1671e0c1b49fSNick Terrell size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1672e0c1b49fSNick Terrell {
1673e0c1b49fSNick Terrell DTableDesc const dtd = HUF_getDTableDesc(DTable);
1674e0c1b49fSNick Terrell #if defined(HUF_FORCE_DECOMPRESS_X1)
1675e0c1b49fSNick Terrell (void)dtd;
1676e0c1b49fSNick Terrell assert(dtd.tableType == 0);
1677e0c1b49fSNick Terrell return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1678e0c1b49fSNick Terrell #elif defined(HUF_FORCE_DECOMPRESS_X2)
1679e0c1b49fSNick Terrell (void)dtd;
1680e0c1b49fSNick Terrell assert(dtd.tableType == 1);
1681e0c1b49fSNick Terrell return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1682e0c1b49fSNick Terrell #else
1683e0c1b49fSNick Terrell return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1684e0c1b49fSNick Terrell HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1685e0c1b49fSNick Terrell #endif
1686e0c1b49fSNick Terrell }
1687e0c1b49fSNick Terrell
1688e0c1b49fSNick Terrell #ifndef HUF_FORCE_DECOMPRESS_X2
HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize,int bmi2)1689e0c1b49fSNick Terrell size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
1690e0c1b49fSNick Terrell {
1691e0c1b49fSNick Terrell const BYTE* ip = (const BYTE*) cSrc;
1692e0c1b49fSNick Terrell
1693e0c1b49fSNick Terrell size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1694e0c1b49fSNick Terrell if (HUF_isError(hSize)) return hSize;
1695e0c1b49fSNick Terrell if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
1696e0c1b49fSNick Terrell ip += hSize; cSrcSize -= hSize;
1697e0c1b49fSNick Terrell
1698e0c1b49fSNick Terrell return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
1699e0c1b49fSNick Terrell }
1700e0c1b49fSNick Terrell #endif
1701e0c1b49fSNick Terrell
HUF_decompress4X_usingDTable_bmi2(void * dst,size_t maxDstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable,int bmi2)1702e0c1b49fSNick Terrell size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1703e0c1b49fSNick Terrell {
1704e0c1b49fSNick Terrell DTableDesc const dtd = HUF_getDTableDesc(DTable);
1705e0c1b49fSNick Terrell #if defined(HUF_FORCE_DECOMPRESS_X1)
1706e0c1b49fSNick Terrell (void)dtd;
1707e0c1b49fSNick Terrell assert(dtd.tableType == 0);
1708e0c1b49fSNick Terrell return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1709e0c1b49fSNick Terrell #elif defined(HUF_FORCE_DECOMPRESS_X2)
1710e0c1b49fSNick Terrell (void)dtd;
1711e0c1b49fSNick Terrell assert(dtd.tableType == 1);
1712e0c1b49fSNick Terrell return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1713e0c1b49fSNick Terrell #else
1714e0c1b49fSNick Terrell return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1715e0c1b49fSNick Terrell HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1716e0c1b49fSNick Terrell #endif
1717e0c1b49fSNick Terrell }
1718e0c1b49fSNick Terrell
HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize,int bmi2)1719e0c1b49fSNick Terrell size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
1720e0c1b49fSNick Terrell {
1721e0c1b49fSNick Terrell /* validation checks */
1722e0c1b49fSNick Terrell if (dstSize == 0) return ERROR(dstSize_tooSmall);
1723e0c1b49fSNick Terrell if (cSrcSize == 0) return ERROR(corruption_detected);
1724e0c1b49fSNick Terrell
1725e0c1b49fSNick Terrell { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1726e0c1b49fSNick Terrell #if defined(HUF_FORCE_DECOMPRESS_X1)
1727e0c1b49fSNick Terrell (void)algoNb;
1728e0c1b49fSNick Terrell assert(algoNb == 0);
1729e0c1b49fSNick Terrell return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1730e0c1b49fSNick Terrell #elif defined(HUF_FORCE_DECOMPRESS_X2)
1731e0c1b49fSNick Terrell (void)algoNb;
1732e0c1b49fSNick Terrell assert(algoNb == 1);
1733e0c1b49fSNick Terrell return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1734e0c1b49fSNick Terrell #else
1735e0c1b49fSNick Terrell return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :
1736e0c1b49fSNick Terrell HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1737e0c1b49fSNick Terrell #endif
1738e0c1b49fSNick Terrell }
1739e0c1b49fSNick Terrell }
1740e0c1b49fSNick Terrell
1741