xref: /openbmc/linux/lib/zstd/decompress/huf_decompress.c (revision 2aa14b1ab2c41a4fe41efae80d58bb77da91f19f)
1e0c1b49fSNick Terrell /* ******************************************************************
2e0c1b49fSNick Terrell  * huff0 huffman decoder,
3e0c1b49fSNick Terrell  * part of Finite State Entropy library
4e0c1b49fSNick Terrell  * Copyright (c) Yann Collet, Facebook, Inc.
5e0c1b49fSNick Terrell  *
6e0c1b49fSNick Terrell  *  You can contact the author at :
7e0c1b49fSNick Terrell  *  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
8e0c1b49fSNick Terrell  *
9e0c1b49fSNick Terrell  * This source code is licensed under both the BSD-style license (found in the
10e0c1b49fSNick Terrell  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
11e0c1b49fSNick Terrell  * in the COPYING file in the root directory of this source tree).
12e0c1b49fSNick Terrell  * You may select, at your option, one of the above-listed licenses.
13e0c1b49fSNick Terrell ****************************************************************** */
14e0c1b49fSNick Terrell 
15e0c1b49fSNick Terrell /* **************************************************************
16e0c1b49fSNick Terrell *  Dependencies
17e0c1b49fSNick Terrell ****************************************************************/
18e0c1b49fSNick Terrell #include "../common/zstd_deps.h"  /* ZSTD_memcpy, ZSTD_memset */
19e0c1b49fSNick Terrell #include "../common/compiler.h"
20e0c1b49fSNick Terrell #include "../common/bitstream.h"  /* BIT_* */
21e0c1b49fSNick Terrell #include "../common/fse.h"        /* to compress headers */
22e0c1b49fSNick Terrell #define HUF_STATIC_LINKING_ONLY
23e0c1b49fSNick Terrell #include "../common/huf.h"
24e0c1b49fSNick Terrell #include "../common/error_private.h"
25*2aa14b1aSNick Terrell #include "../common/zstd_internal.h"
26*2aa14b1aSNick Terrell 
27*2aa14b1aSNick Terrell /* **************************************************************
28*2aa14b1aSNick Terrell *  Constants
29*2aa14b1aSNick Terrell ****************************************************************/
30*2aa14b1aSNick Terrell 
31*2aa14b1aSNick Terrell #define HUF_DECODER_FAST_TABLELOG 11
32e0c1b49fSNick Terrell 
33e0c1b49fSNick Terrell /* **************************************************************
34e0c1b49fSNick Terrell *  Macros
35e0c1b49fSNick Terrell ****************************************************************/
36e0c1b49fSNick Terrell 
37e0c1b49fSNick Terrell /* These two optional macros force the use one way or another of the two
38e0c1b49fSNick Terrell  * Huffman decompression implementations. You can't force in both directions
39e0c1b49fSNick Terrell  * at the same time.
40e0c1b49fSNick Terrell  */
41e0c1b49fSNick Terrell #if defined(HUF_FORCE_DECOMPRESS_X1) && \
42e0c1b49fSNick Terrell     defined(HUF_FORCE_DECOMPRESS_X2)
43e0c1b49fSNick Terrell #error "Cannot force the use of the X1 and X2 decoders at the same time!"
44e0c1b49fSNick Terrell #endif
45e0c1b49fSNick Terrell 
46*2aa14b1aSNick Terrell #if ZSTD_ENABLE_ASM_X86_64_BMI2 && DYNAMIC_BMI2
47*2aa14b1aSNick Terrell # define HUF_ASM_X86_64_BMI2_ATTRS BMI2_TARGET_ATTRIBUTE
48*2aa14b1aSNick Terrell #else
49*2aa14b1aSNick Terrell # define HUF_ASM_X86_64_BMI2_ATTRS
50*2aa14b1aSNick Terrell #endif
51*2aa14b1aSNick Terrell 
52*2aa14b1aSNick Terrell #define HUF_EXTERN_C
53*2aa14b1aSNick Terrell #define HUF_ASM_DECL HUF_EXTERN_C
54*2aa14b1aSNick Terrell 
55*2aa14b1aSNick Terrell #if DYNAMIC_BMI2 || (ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__))
56*2aa14b1aSNick Terrell # define HUF_NEED_BMI2_FUNCTION 1
57*2aa14b1aSNick Terrell #else
58*2aa14b1aSNick Terrell # define HUF_NEED_BMI2_FUNCTION 0
59*2aa14b1aSNick Terrell #endif
60*2aa14b1aSNick Terrell 
61*2aa14b1aSNick Terrell #if !(ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__))
62*2aa14b1aSNick Terrell # define HUF_NEED_DEFAULT_FUNCTION 1
63*2aa14b1aSNick Terrell #else
64*2aa14b1aSNick Terrell # define HUF_NEED_DEFAULT_FUNCTION 0
65*2aa14b1aSNick Terrell #endif
66e0c1b49fSNick Terrell 
67e0c1b49fSNick Terrell /* **************************************************************
68e0c1b49fSNick Terrell *  Error Management
69e0c1b49fSNick Terrell ****************************************************************/
70e0c1b49fSNick Terrell #define HUF_isError ERR_isError
71e0c1b49fSNick Terrell 
72e0c1b49fSNick Terrell 
73e0c1b49fSNick Terrell /* **************************************************************
74e0c1b49fSNick Terrell *  Byte alignment for workSpace management
75e0c1b49fSNick Terrell ****************************************************************/
76e0c1b49fSNick Terrell #define HUF_ALIGN(x, a)         HUF_ALIGN_MASK((x), (a) - 1)
77e0c1b49fSNick Terrell #define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
78e0c1b49fSNick Terrell 
79e0c1b49fSNick Terrell 
80e0c1b49fSNick Terrell /* **************************************************************
81e0c1b49fSNick Terrell *  BMI2 Variant Wrappers
82e0c1b49fSNick Terrell ****************************************************************/
83e0c1b49fSNick Terrell #if DYNAMIC_BMI2
84e0c1b49fSNick Terrell 
85e0c1b49fSNick Terrell #define HUF_DGEN(fn)                                                        \
86e0c1b49fSNick Terrell                                                                             \
87e0c1b49fSNick Terrell     static size_t fn##_default(                                             \
88e0c1b49fSNick Terrell                   void* dst,  size_t dstSize,                               \
89e0c1b49fSNick Terrell             const void* cSrc, size_t cSrcSize,                              \
90e0c1b49fSNick Terrell             const HUF_DTable* DTable)                                       \
91e0c1b49fSNick Terrell     {                                                                       \
92e0c1b49fSNick Terrell         return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable);             \
93e0c1b49fSNick Terrell     }                                                                       \
94e0c1b49fSNick Terrell                                                                             \
95*2aa14b1aSNick Terrell     static BMI2_TARGET_ATTRIBUTE size_t fn##_bmi2(                          \
96e0c1b49fSNick Terrell                   void* dst,  size_t dstSize,                               \
97e0c1b49fSNick Terrell             const void* cSrc, size_t cSrcSize,                              \
98e0c1b49fSNick Terrell             const HUF_DTable* DTable)                                       \
99e0c1b49fSNick Terrell     {                                                                       \
100e0c1b49fSNick Terrell         return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable);             \
101e0c1b49fSNick Terrell     }                                                                       \
102e0c1b49fSNick Terrell                                                                             \
103e0c1b49fSNick Terrell     static size_t fn(void* dst, size_t dstSize, void const* cSrc,           \
104e0c1b49fSNick Terrell                      size_t cSrcSize, HUF_DTable const* DTable, int bmi2)   \
105e0c1b49fSNick Terrell     {                                                                       \
106e0c1b49fSNick Terrell         if (bmi2) {                                                         \
107e0c1b49fSNick Terrell             return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);         \
108e0c1b49fSNick Terrell         }                                                                   \
109e0c1b49fSNick Terrell         return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable);          \
110e0c1b49fSNick Terrell     }
111e0c1b49fSNick Terrell 
112e0c1b49fSNick Terrell #else
113e0c1b49fSNick Terrell 
114e0c1b49fSNick Terrell #define HUF_DGEN(fn)                                                        \
115e0c1b49fSNick Terrell     static size_t fn(void* dst, size_t dstSize, void const* cSrc,           \
116e0c1b49fSNick Terrell                      size_t cSrcSize, HUF_DTable const* DTable, int bmi2)   \
117e0c1b49fSNick Terrell     {                                                                       \
118e0c1b49fSNick Terrell         (void)bmi2;                                                         \
119e0c1b49fSNick Terrell         return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable);             \
120e0c1b49fSNick Terrell     }
121e0c1b49fSNick Terrell 
122e0c1b49fSNick Terrell #endif
123e0c1b49fSNick Terrell 
124e0c1b49fSNick Terrell 
125e0c1b49fSNick Terrell /*-***************************/
126e0c1b49fSNick Terrell /*  generic DTableDesc       */
127e0c1b49fSNick Terrell /*-***************************/
128e0c1b49fSNick Terrell typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc;
129e0c1b49fSNick Terrell 
130e0c1b49fSNick Terrell static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
131e0c1b49fSNick Terrell {
132e0c1b49fSNick Terrell     DTableDesc dtd;
133e0c1b49fSNick Terrell     ZSTD_memcpy(&dtd, table, sizeof(dtd));
134e0c1b49fSNick Terrell     return dtd;
135e0c1b49fSNick Terrell }
136e0c1b49fSNick Terrell 
137*2aa14b1aSNick Terrell #if ZSTD_ENABLE_ASM_X86_64_BMI2
138*2aa14b1aSNick Terrell 
139*2aa14b1aSNick Terrell static size_t HUF_initDStream(BYTE const* ip) {
140*2aa14b1aSNick Terrell     BYTE const lastByte = ip[7];
141*2aa14b1aSNick Terrell     size_t const bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
142*2aa14b1aSNick Terrell     size_t const value = MEM_readLEST(ip) | 1;
143*2aa14b1aSNick Terrell     assert(bitsConsumed <= 8);
144*2aa14b1aSNick Terrell     return value << bitsConsumed;
145*2aa14b1aSNick Terrell }
146*2aa14b1aSNick Terrell typedef struct {
147*2aa14b1aSNick Terrell     BYTE const* ip[4];
148*2aa14b1aSNick Terrell     BYTE* op[4];
149*2aa14b1aSNick Terrell     U64 bits[4];
150*2aa14b1aSNick Terrell     void const* dt;
151*2aa14b1aSNick Terrell     BYTE const* ilimit;
152*2aa14b1aSNick Terrell     BYTE* oend;
153*2aa14b1aSNick Terrell     BYTE const* iend[4];
154*2aa14b1aSNick Terrell } HUF_DecompressAsmArgs;
155*2aa14b1aSNick Terrell 
156*2aa14b1aSNick Terrell /*
157*2aa14b1aSNick Terrell  * Initializes args for the asm decoding loop.
158*2aa14b1aSNick Terrell  * @returns 0 on success
159*2aa14b1aSNick Terrell  *          1 if the fallback implementation should be used.
160*2aa14b1aSNick Terrell  *          Or an error code on failure.
161*2aa14b1aSNick Terrell  */
162*2aa14b1aSNick Terrell static size_t HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs* args, void* dst, size_t dstSize, void const* src, size_t srcSize, const HUF_DTable* DTable)
163*2aa14b1aSNick Terrell {
164*2aa14b1aSNick Terrell     void const* dt = DTable + 1;
165*2aa14b1aSNick Terrell     U32 const dtLog = HUF_getDTableDesc(DTable).tableLog;
166*2aa14b1aSNick Terrell 
167*2aa14b1aSNick Terrell     const BYTE* const ilimit = (const BYTE*)src + 6 + 8;
168*2aa14b1aSNick Terrell 
169*2aa14b1aSNick Terrell     BYTE* const oend = (BYTE*)dst + dstSize;
170*2aa14b1aSNick Terrell 
171*2aa14b1aSNick Terrell     /* The following condition is false on x32 platform,
172*2aa14b1aSNick Terrell      * but HUF_asm is not compatible with this ABI */
173*2aa14b1aSNick Terrell     if (!(MEM_isLittleEndian() && !MEM_32bits())) return 1;
174*2aa14b1aSNick Terrell 
175*2aa14b1aSNick Terrell     /* strict minimum : jump table + 1 byte per stream */
176*2aa14b1aSNick Terrell     if (srcSize < 10)
177*2aa14b1aSNick Terrell         return ERROR(corruption_detected);
178*2aa14b1aSNick Terrell 
179*2aa14b1aSNick Terrell     /* Must have at least 8 bytes per stream because we don't handle initializing smaller bit containers.
180*2aa14b1aSNick Terrell      * If table log is not correct at this point, fallback to the old decoder.
181*2aa14b1aSNick Terrell      * On small inputs we don't have enough data to trigger the fast loop, so use the old decoder.
182*2aa14b1aSNick Terrell      */
183*2aa14b1aSNick Terrell     if (dtLog != HUF_DECODER_FAST_TABLELOG)
184*2aa14b1aSNick Terrell         return 1;
185*2aa14b1aSNick Terrell 
186*2aa14b1aSNick Terrell     /* Read the jump table. */
187*2aa14b1aSNick Terrell     {
188*2aa14b1aSNick Terrell         const BYTE* const istart = (const BYTE*)src;
189*2aa14b1aSNick Terrell         size_t const length1 = MEM_readLE16(istart);
190*2aa14b1aSNick Terrell         size_t const length2 = MEM_readLE16(istart+2);
191*2aa14b1aSNick Terrell         size_t const length3 = MEM_readLE16(istart+4);
192*2aa14b1aSNick Terrell         size_t const length4 = srcSize - (length1 + length2 + length3 + 6);
193*2aa14b1aSNick Terrell         args->iend[0] = istart + 6;  /* jumpTable */
194*2aa14b1aSNick Terrell         args->iend[1] = args->iend[0] + length1;
195*2aa14b1aSNick Terrell         args->iend[2] = args->iend[1] + length2;
196*2aa14b1aSNick Terrell         args->iend[3] = args->iend[2] + length3;
197*2aa14b1aSNick Terrell 
198*2aa14b1aSNick Terrell         /* HUF_initDStream() requires this, and this small of an input
199*2aa14b1aSNick Terrell          * won't benefit from the ASM loop anyways.
200*2aa14b1aSNick Terrell          * length1 must be >= 16 so that ip[0] >= ilimit before the loop
201*2aa14b1aSNick Terrell          * starts.
202*2aa14b1aSNick Terrell          */
203*2aa14b1aSNick Terrell         if (length1 < 16 || length2 < 8 || length3 < 8 || length4 < 8)
204*2aa14b1aSNick Terrell             return 1;
205*2aa14b1aSNick Terrell         if (length4 > srcSize) return ERROR(corruption_detected);   /* overflow */
206*2aa14b1aSNick Terrell     }
207*2aa14b1aSNick Terrell     /* ip[] contains the position that is currently loaded into bits[]. */
208*2aa14b1aSNick Terrell     args->ip[0] = args->iend[1] - sizeof(U64);
209*2aa14b1aSNick Terrell     args->ip[1] = args->iend[2] - sizeof(U64);
210*2aa14b1aSNick Terrell     args->ip[2] = args->iend[3] - sizeof(U64);
211*2aa14b1aSNick Terrell     args->ip[3] = (BYTE const*)src + srcSize - sizeof(U64);
212*2aa14b1aSNick Terrell 
213*2aa14b1aSNick Terrell     /* op[] contains the output pointers. */
214*2aa14b1aSNick Terrell     args->op[0] = (BYTE*)dst;
215*2aa14b1aSNick Terrell     args->op[1] = args->op[0] + (dstSize+3)/4;
216*2aa14b1aSNick Terrell     args->op[2] = args->op[1] + (dstSize+3)/4;
217*2aa14b1aSNick Terrell     args->op[3] = args->op[2] + (dstSize+3)/4;
218*2aa14b1aSNick Terrell 
219*2aa14b1aSNick Terrell     /* No point to call the ASM loop for tiny outputs. */
220*2aa14b1aSNick Terrell     if (args->op[3] >= oend)
221*2aa14b1aSNick Terrell         return 1;
222*2aa14b1aSNick Terrell 
223*2aa14b1aSNick Terrell     /* bits[] is the bit container.
224*2aa14b1aSNick Terrell         * It is read from the MSB down to the LSB.
225*2aa14b1aSNick Terrell         * It is shifted left as it is read, and zeros are
226*2aa14b1aSNick Terrell         * shifted in. After the lowest valid bit a 1 is
227*2aa14b1aSNick Terrell         * set, so that CountTrailingZeros(bits[]) can be used
228*2aa14b1aSNick Terrell         * to count how many bits we've consumed.
229*2aa14b1aSNick Terrell         */
230*2aa14b1aSNick Terrell     args->bits[0] = HUF_initDStream(args->ip[0]);
231*2aa14b1aSNick Terrell     args->bits[1] = HUF_initDStream(args->ip[1]);
232*2aa14b1aSNick Terrell     args->bits[2] = HUF_initDStream(args->ip[2]);
233*2aa14b1aSNick Terrell     args->bits[3] = HUF_initDStream(args->ip[3]);
234*2aa14b1aSNick Terrell 
235*2aa14b1aSNick Terrell     /* If ip[] >= ilimit, it is guaranteed to be safe to
236*2aa14b1aSNick Terrell         * reload bits[]. It may be beyond its section, but is
237*2aa14b1aSNick Terrell         * guaranteed to be valid (>= istart).
238*2aa14b1aSNick Terrell         */
239*2aa14b1aSNick Terrell     args->ilimit = ilimit;
240*2aa14b1aSNick Terrell 
241*2aa14b1aSNick Terrell     args->oend = oend;
242*2aa14b1aSNick Terrell     args->dt = dt;
243*2aa14b1aSNick Terrell 
244*2aa14b1aSNick Terrell     return 0;
245*2aa14b1aSNick Terrell }
246*2aa14b1aSNick Terrell 
247*2aa14b1aSNick Terrell static size_t HUF_initRemainingDStream(BIT_DStream_t* bit, HUF_DecompressAsmArgs const* args, int stream, BYTE* segmentEnd)
248*2aa14b1aSNick Terrell {
249*2aa14b1aSNick Terrell     /* Validate that we haven't overwritten. */
250*2aa14b1aSNick Terrell     if (args->op[stream] > segmentEnd)
251*2aa14b1aSNick Terrell         return ERROR(corruption_detected);
252*2aa14b1aSNick Terrell     /* Validate that we haven't read beyond iend[].
253*2aa14b1aSNick Terrell         * Note that ip[] may be < iend[] because the MSB is
254*2aa14b1aSNick Terrell         * the next bit to read, and we may have consumed 100%
255*2aa14b1aSNick Terrell         * of the stream, so down to iend[i] - 8 is valid.
256*2aa14b1aSNick Terrell         */
257*2aa14b1aSNick Terrell     if (args->ip[stream] < args->iend[stream] - 8)
258*2aa14b1aSNick Terrell         return ERROR(corruption_detected);
259*2aa14b1aSNick Terrell 
260*2aa14b1aSNick Terrell     /* Construct the BIT_DStream_t. */
261*2aa14b1aSNick Terrell     bit->bitContainer = MEM_readLE64(args->ip[stream]);
262*2aa14b1aSNick Terrell     bit->bitsConsumed = ZSTD_countTrailingZeros((size_t)args->bits[stream]);
263*2aa14b1aSNick Terrell     bit->start = (const char*)args->iend[0];
264*2aa14b1aSNick Terrell     bit->limitPtr = bit->start + sizeof(size_t);
265*2aa14b1aSNick Terrell     bit->ptr = (const char*)args->ip[stream];
266*2aa14b1aSNick Terrell 
267*2aa14b1aSNick Terrell     return 0;
268*2aa14b1aSNick Terrell }
269*2aa14b1aSNick Terrell #endif
270*2aa14b1aSNick Terrell 
271e0c1b49fSNick Terrell 
272e0c1b49fSNick Terrell #ifndef HUF_FORCE_DECOMPRESS_X2
273e0c1b49fSNick Terrell 
274e0c1b49fSNick Terrell /*-***************************/
275e0c1b49fSNick Terrell /*  single-symbol decoding   */
276e0c1b49fSNick Terrell /*-***************************/
277*2aa14b1aSNick Terrell typedef struct { BYTE nbBits; BYTE byte; } HUF_DEltX1;   /* single-symbol decoding */
278e0c1b49fSNick Terrell 
279e0c1b49fSNick Terrell /*
280e0c1b49fSNick Terrell  * Packs 4 HUF_DEltX1 structs into a U64. This is used to lay down 4 entries at
281e0c1b49fSNick Terrell  * a time.
282e0c1b49fSNick Terrell  */
283e0c1b49fSNick Terrell static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) {
284e0c1b49fSNick Terrell     U64 D4;
285e0c1b49fSNick Terrell     if (MEM_isLittleEndian()) {
286e0c1b49fSNick Terrell         D4 = (symbol << 8) + nbBits;
287*2aa14b1aSNick Terrell     } else {
288*2aa14b1aSNick Terrell         D4 = symbol + (nbBits << 8);
289e0c1b49fSNick Terrell     }
290e0c1b49fSNick Terrell     D4 *= 0x0001000100010001ULL;
291e0c1b49fSNick Terrell     return D4;
292e0c1b49fSNick Terrell }
293e0c1b49fSNick Terrell 
294*2aa14b1aSNick Terrell /*
295*2aa14b1aSNick Terrell  * Increase the tableLog to targetTableLog and rescales the stats.
296*2aa14b1aSNick Terrell  * If tableLog > targetTableLog this is a no-op.
297*2aa14b1aSNick Terrell  * @returns New tableLog
298*2aa14b1aSNick Terrell  */
299*2aa14b1aSNick Terrell static U32 HUF_rescaleStats(BYTE* huffWeight, U32* rankVal, U32 nbSymbols, U32 tableLog, U32 targetTableLog)
300*2aa14b1aSNick Terrell {
301*2aa14b1aSNick Terrell     if (tableLog > targetTableLog)
302*2aa14b1aSNick Terrell         return tableLog;
303*2aa14b1aSNick Terrell     if (tableLog < targetTableLog) {
304*2aa14b1aSNick Terrell         U32 const scale = targetTableLog - tableLog;
305*2aa14b1aSNick Terrell         U32 s;
306*2aa14b1aSNick Terrell         /* Increase the weight for all non-zero probability symbols by scale. */
307*2aa14b1aSNick Terrell         for (s = 0; s < nbSymbols; ++s) {
308*2aa14b1aSNick Terrell             huffWeight[s] += (BYTE)((huffWeight[s] == 0) ? 0 : scale);
309*2aa14b1aSNick Terrell         }
310*2aa14b1aSNick Terrell         /* Update rankVal to reflect the new weights.
311*2aa14b1aSNick Terrell          * All weights except 0 get moved to weight + scale.
312*2aa14b1aSNick Terrell          * Weights [1, scale] are empty.
313*2aa14b1aSNick Terrell          */
314*2aa14b1aSNick Terrell         for (s = targetTableLog; s > scale; --s) {
315*2aa14b1aSNick Terrell             rankVal[s] = rankVal[s - scale];
316*2aa14b1aSNick Terrell         }
317*2aa14b1aSNick Terrell         for (s = scale; s > 0; --s) {
318*2aa14b1aSNick Terrell             rankVal[s] = 0;
319*2aa14b1aSNick Terrell         }
320*2aa14b1aSNick Terrell     }
321*2aa14b1aSNick Terrell     return targetTableLog;
322*2aa14b1aSNick Terrell }
323*2aa14b1aSNick Terrell 
324e0c1b49fSNick Terrell typedef struct {
325e0c1b49fSNick Terrell         U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];
326e0c1b49fSNick Terrell         U32 rankStart[HUF_TABLELOG_ABSOLUTEMAX + 1];
327e0c1b49fSNick Terrell         U32 statsWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
328e0c1b49fSNick Terrell         BYTE symbols[HUF_SYMBOLVALUE_MAX + 1];
329e0c1b49fSNick Terrell         BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];
330e0c1b49fSNick Terrell } HUF_ReadDTableX1_Workspace;
331e0c1b49fSNick Terrell 
332e0c1b49fSNick Terrell 
333e0c1b49fSNick Terrell size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
334e0c1b49fSNick Terrell {
335e0c1b49fSNick Terrell     return HUF_readDTableX1_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0);
336e0c1b49fSNick Terrell }
337e0c1b49fSNick Terrell 
338e0c1b49fSNick Terrell size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2)
339e0c1b49fSNick Terrell {
340e0c1b49fSNick Terrell     U32 tableLog = 0;
341e0c1b49fSNick Terrell     U32 nbSymbols = 0;
342e0c1b49fSNick Terrell     size_t iSize;
343e0c1b49fSNick Terrell     void* const dtPtr = DTable + 1;
344e0c1b49fSNick Terrell     HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr;
345e0c1b49fSNick Terrell     HUF_ReadDTableX1_Workspace* wksp = (HUF_ReadDTableX1_Workspace*)workSpace;
346e0c1b49fSNick Terrell 
347e0c1b49fSNick Terrell     DEBUG_STATIC_ASSERT(HUF_DECOMPRESS_WORKSPACE_SIZE >= sizeof(*wksp));
348e0c1b49fSNick Terrell     if (sizeof(*wksp) > wkspSize) return ERROR(tableLog_tooLarge);
349e0c1b49fSNick Terrell 
350e0c1b49fSNick Terrell     DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
351e0c1b49fSNick Terrell     /* ZSTD_memset(huffWeight, 0, sizeof(huffWeight)); */   /* is not necessary, even though some analyzer complain ... */
352e0c1b49fSNick Terrell 
353e0c1b49fSNick Terrell     iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), bmi2);
354e0c1b49fSNick Terrell     if (HUF_isError(iSize)) return iSize;
355e0c1b49fSNick Terrell 
356*2aa14b1aSNick Terrell 
357e0c1b49fSNick Terrell     /* Table header */
358e0c1b49fSNick Terrell     {   DTableDesc dtd = HUF_getDTableDesc(DTable);
359*2aa14b1aSNick Terrell         U32 const maxTableLog = dtd.maxTableLog + 1;
360*2aa14b1aSNick Terrell         U32 const targetTableLog = MIN(maxTableLog, HUF_DECODER_FAST_TABLELOG);
361*2aa14b1aSNick Terrell         tableLog = HUF_rescaleStats(wksp->huffWeight, wksp->rankVal, nbSymbols, tableLog, targetTableLog);
362e0c1b49fSNick Terrell         if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge);   /* DTable too small, Huffman tree cannot fit in */
363e0c1b49fSNick Terrell         dtd.tableType = 0;
364e0c1b49fSNick Terrell         dtd.tableLog = (BYTE)tableLog;
365e0c1b49fSNick Terrell         ZSTD_memcpy(DTable, &dtd, sizeof(dtd));
366e0c1b49fSNick Terrell     }
367e0c1b49fSNick Terrell 
368e0c1b49fSNick Terrell     /* Compute symbols and rankStart given rankVal:
369e0c1b49fSNick Terrell      *
370e0c1b49fSNick Terrell      * rankVal already contains the number of values of each weight.
371e0c1b49fSNick Terrell      *
372e0c1b49fSNick Terrell      * symbols contains the symbols ordered by weight. First are the rankVal[0]
373e0c1b49fSNick Terrell      * weight 0 symbols, followed by the rankVal[1] weight 1 symbols, and so on.
374e0c1b49fSNick Terrell      * symbols[0] is filled (but unused) to avoid a branch.
375e0c1b49fSNick Terrell      *
376e0c1b49fSNick Terrell      * rankStart contains the offset where each rank belongs in the DTable.
377e0c1b49fSNick Terrell      * rankStart[0] is not filled because there are no entries in the table for
378e0c1b49fSNick Terrell      * weight 0.
379e0c1b49fSNick Terrell      */
380e0c1b49fSNick Terrell     {
381e0c1b49fSNick Terrell         int n;
382e0c1b49fSNick Terrell         int nextRankStart = 0;
383e0c1b49fSNick Terrell         int const unroll = 4;
384e0c1b49fSNick Terrell         int const nLimit = (int)nbSymbols - unroll + 1;
385e0c1b49fSNick Terrell         for (n=0; n<(int)tableLog+1; n++) {
386e0c1b49fSNick Terrell             U32 const curr = nextRankStart;
387e0c1b49fSNick Terrell             nextRankStart += wksp->rankVal[n];
388e0c1b49fSNick Terrell             wksp->rankStart[n] = curr;
389e0c1b49fSNick Terrell         }
390e0c1b49fSNick Terrell         for (n=0; n < nLimit; n += unroll) {
391e0c1b49fSNick Terrell             int u;
392e0c1b49fSNick Terrell             for (u=0; u < unroll; ++u) {
393e0c1b49fSNick Terrell                 size_t const w = wksp->huffWeight[n+u];
394e0c1b49fSNick Terrell                 wksp->symbols[wksp->rankStart[w]++] = (BYTE)(n+u);
395e0c1b49fSNick Terrell             }
396e0c1b49fSNick Terrell         }
397e0c1b49fSNick Terrell         for (; n < (int)nbSymbols; ++n) {
398e0c1b49fSNick Terrell             size_t const w = wksp->huffWeight[n];
399e0c1b49fSNick Terrell             wksp->symbols[wksp->rankStart[w]++] = (BYTE)n;
400e0c1b49fSNick Terrell         }
401e0c1b49fSNick Terrell     }
402e0c1b49fSNick Terrell 
403e0c1b49fSNick Terrell     /* fill DTable
404e0c1b49fSNick Terrell      * We fill all entries of each weight in order.
405*2aa14b1aSNick Terrell      * That way length is a constant for each iteration of the outer loop.
406e0c1b49fSNick Terrell      * We can switch based on the length to a different inner loop which is
407e0c1b49fSNick Terrell      * optimized for that particular case.
408e0c1b49fSNick Terrell      */
409e0c1b49fSNick Terrell     {
410e0c1b49fSNick Terrell         U32 w;
411e0c1b49fSNick Terrell         int symbol=wksp->rankVal[0];
412e0c1b49fSNick Terrell         int rankStart=0;
413e0c1b49fSNick Terrell         for (w=1; w<tableLog+1; ++w) {
414e0c1b49fSNick Terrell             int const symbolCount = wksp->rankVal[w];
415e0c1b49fSNick Terrell             int const length = (1 << w) >> 1;
416e0c1b49fSNick Terrell             int uStart = rankStart;
417e0c1b49fSNick Terrell             BYTE const nbBits = (BYTE)(tableLog + 1 - w);
418e0c1b49fSNick Terrell             int s;
419e0c1b49fSNick Terrell             int u;
420e0c1b49fSNick Terrell             switch (length) {
421e0c1b49fSNick Terrell             case 1:
422e0c1b49fSNick Terrell                 for (s=0; s<symbolCount; ++s) {
423e0c1b49fSNick Terrell                     HUF_DEltX1 D;
424e0c1b49fSNick Terrell                     D.byte = wksp->symbols[symbol + s];
425e0c1b49fSNick Terrell                     D.nbBits = nbBits;
426e0c1b49fSNick Terrell                     dt[uStart] = D;
427e0c1b49fSNick Terrell                     uStart += 1;
428e0c1b49fSNick Terrell                 }
429e0c1b49fSNick Terrell                 break;
430e0c1b49fSNick Terrell             case 2:
431e0c1b49fSNick Terrell                 for (s=0; s<symbolCount; ++s) {
432e0c1b49fSNick Terrell                     HUF_DEltX1 D;
433e0c1b49fSNick Terrell                     D.byte = wksp->symbols[symbol + s];
434e0c1b49fSNick Terrell                     D.nbBits = nbBits;
435e0c1b49fSNick Terrell                     dt[uStart+0] = D;
436e0c1b49fSNick Terrell                     dt[uStart+1] = D;
437e0c1b49fSNick Terrell                     uStart += 2;
438e0c1b49fSNick Terrell                 }
439e0c1b49fSNick Terrell                 break;
440e0c1b49fSNick Terrell             case 4:
441e0c1b49fSNick Terrell                 for (s=0; s<symbolCount; ++s) {
442e0c1b49fSNick Terrell                     U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
443e0c1b49fSNick Terrell                     MEM_write64(dt + uStart, D4);
444e0c1b49fSNick Terrell                     uStart += 4;
445e0c1b49fSNick Terrell                 }
446e0c1b49fSNick Terrell                 break;
447e0c1b49fSNick Terrell             case 8:
448e0c1b49fSNick Terrell                 for (s=0; s<symbolCount; ++s) {
449e0c1b49fSNick Terrell                     U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
450e0c1b49fSNick Terrell                     MEM_write64(dt + uStart, D4);
451e0c1b49fSNick Terrell                     MEM_write64(dt + uStart + 4, D4);
452e0c1b49fSNick Terrell                     uStart += 8;
453e0c1b49fSNick Terrell                 }
454e0c1b49fSNick Terrell                 break;
455e0c1b49fSNick Terrell             default:
456e0c1b49fSNick Terrell                 for (s=0; s<symbolCount; ++s) {
457e0c1b49fSNick Terrell                     U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
458e0c1b49fSNick Terrell                     for (u=0; u < length; u += 16) {
459e0c1b49fSNick Terrell                         MEM_write64(dt + uStart + u + 0, D4);
460e0c1b49fSNick Terrell                         MEM_write64(dt + uStart + u + 4, D4);
461e0c1b49fSNick Terrell                         MEM_write64(dt + uStart + u + 8, D4);
462e0c1b49fSNick Terrell                         MEM_write64(dt + uStart + u + 12, D4);
463e0c1b49fSNick Terrell                     }
464e0c1b49fSNick Terrell                     assert(u == length);
465e0c1b49fSNick Terrell                     uStart += length;
466e0c1b49fSNick Terrell                 }
467e0c1b49fSNick Terrell                 break;
468e0c1b49fSNick Terrell             }
469e0c1b49fSNick Terrell             symbol += symbolCount;
470e0c1b49fSNick Terrell             rankStart += symbolCount * length;
471e0c1b49fSNick Terrell         }
472e0c1b49fSNick Terrell     }
473e0c1b49fSNick Terrell     return iSize;
474e0c1b49fSNick Terrell }
475e0c1b49fSNick Terrell 
476e0c1b49fSNick Terrell FORCE_INLINE_TEMPLATE BYTE
477e0c1b49fSNick Terrell HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog)
478e0c1b49fSNick Terrell {
479e0c1b49fSNick Terrell     size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
480e0c1b49fSNick Terrell     BYTE const c = dt[val].byte;
481e0c1b49fSNick Terrell     BIT_skipBits(Dstream, dt[val].nbBits);
482e0c1b49fSNick Terrell     return c;
483e0c1b49fSNick Terrell }
484e0c1b49fSNick Terrell 
485e0c1b49fSNick Terrell #define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \
486e0c1b49fSNick Terrell     *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog)
487e0c1b49fSNick Terrell 
488e0c1b49fSNick Terrell #define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr)  \
489e0c1b49fSNick Terrell     if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
490e0c1b49fSNick Terrell         HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
491e0c1b49fSNick Terrell 
492e0c1b49fSNick Terrell #define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \
493e0c1b49fSNick Terrell     if (MEM_64bits()) \
494e0c1b49fSNick Terrell         HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
495e0c1b49fSNick Terrell 
496e0c1b49fSNick Terrell HINT_INLINE size_t
497e0c1b49fSNick Terrell HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog)
498e0c1b49fSNick Terrell {
499e0c1b49fSNick Terrell     BYTE* const pStart = p;
500e0c1b49fSNick Terrell 
501e0c1b49fSNick Terrell     /* up to 4 symbols at a time */
502*2aa14b1aSNick Terrell     if ((pEnd - p) > 3) {
503e0c1b49fSNick Terrell         while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) {
504e0c1b49fSNick Terrell             HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
505e0c1b49fSNick Terrell             HUF_DECODE_SYMBOLX1_1(p, bitDPtr);
506e0c1b49fSNick Terrell             HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
507e0c1b49fSNick Terrell             HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
508e0c1b49fSNick Terrell         }
509*2aa14b1aSNick Terrell     } else {
510*2aa14b1aSNick Terrell         BIT_reloadDStream(bitDPtr);
511*2aa14b1aSNick Terrell     }
512e0c1b49fSNick Terrell 
513e0c1b49fSNick Terrell     /* [0-3] symbols remaining */
514e0c1b49fSNick Terrell     if (MEM_32bits())
515e0c1b49fSNick Terrell         while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd))
516e0c1b49fSNick Terrell             HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
517e0c1b49fSNick Terrell 
518e0c1b49fSNick Terrell     /* no more data to retrieve from bitstream, no need to reload */
519e0c1b49fSNick Terrell     while (p < pEnd)
520e0c1b49fSNick Terrell         HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
521e0c1b49fSNick Terrell 
522e0c1b49fSNick Terrell     return pEnd-pStart;
523e0c1b49fSNick Terrell }
524e0c1b49fSNick Terrell 
525e0c1b49fSNick Terrell FORCE_INLINE_TEMPLATE size_t
526e0c1b49fSNick Terrell HUF_decompress1X1_usingDTable_internal_body(
527e0c1b49fSNick Terrell           void* dst,  size_t dstSize,
528e0c1b49fSNick Terrell     const void* cSrc, size_t cSrcSize,
529e0c1b49fSNick Terrell     const HUF_DTable* DTable)
530e0c1b49fSNick Terrell {
531e0c1b49fSNick Terrell     BYTE* op = (BYTE*)dst;
532e0c1b49fSNick Terrell     BYTE* const oend = op + dstSize;
533e0c1b49fSNick Terrell     const void* dtPtr = DTable + 1;
534e0c1b49fSNick Terrell     const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
535e0c1b49fSNick Terrell     BIT_DStream_t bitD;
536e0c1b49fSNick Terrell     DTableDesc const dtd = HUF_getDTableDesc(DTable);
537e0c1b49fSNick Terrell     U32 const dtLog = dtd.tableLog;
538e0c1b49fSNick Terrell 
539e0c1b49fSNick Terrell     CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
540e0c1b49fSNick Terrell 
541e0c1b49fSNick Terrell     HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog);
542e0c1b49fSNick Terrell 
543e0c1b49fSNick Terrell     if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
544e0c1b49fSNick Terrell 
545e0c1b49fSNick Terrell     return dstSize;
546e0c1b49fSNick Terrell }
547e0c1b49fSNick Terrell 
548e0c1b49fSNick Terrell FORCE_INLINE_TEMPLATE size_t
549e0c1b49fSNick Terrell HUF_decompress4X1_usingDTable_internal_body(
550e0c1b49fSNick Terrell           void* dst,  size_t dstSize,
551e0c1b49fSNick Terrell     const void* cSrc, size_t cSrcSize,
552e0c1b49fSNick Terrell     const HUF_DTable* DTable)
553e0c1b49fSNick Terrell {
554e0c1b49fSNick Terrell     /* Check */
555e0c1b49fSNick Terrell     if (cSrcSize < 10) return ERROR(corruption_detected);  /* strict minimum : jump table + 1 byte per stream */
556e0c1b49fSNick Terrell 
557e0c1b49fSNick Terrell     {   const BYTE* const istart = (const BYTE*) cSrc;
558e0c1b49fSNick Terrell         BYTE* const ostart = (BYTE*) dst;
559e0c1b49fSNick Terrell         BYTE* const oend = ostart + dstSize;
560e0c1b49fSNick Terrell         BYTE* const olimit = oend - 3;
561e0c1b49fSNick Terrell         const void* const dtPtr = DTable + 1;
562e0c1b49fSNick Terrell         const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
563e0c1b49fSNick Terrell 
564e0c1b49fSNick Terrell         /* Init */
565e0c1b49fSNick Terrell         BIT_DStream_t bitD1;
566e0c1b49fSNick Terrell         BIT_DStream_t bitD2;
567e0c1b49fSNick Terrell         BIT_DStream_t bitD3;
568e0c1b49fSNick Terrell         BIT_DStream_t bitD4;
569e0c1b49fSNick Terrell         size_t const length1 = MEM_readLE16(istart);
570e0c1b49fSNick Terrell         size_t const length2 = MEM_readLE16(istart+2);
571e0c1b49fSNick Terrell         size_t const length3 = MEM_readLE16(istart+4);
572e0c1b49fSNick Terrell         size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
573e0c1b49fSNick Terrell         const BYTE* const istart1 = istart + 6;  /* jumpTable */
574e0c1b49fSNick Terrell         const BYTE* const istart2 = istart1 + length1;
575e0c1b49fSNick Terrell         const BYTE* const istart3 = istart2 + length2;
576e0c1b49fSNick Terrell         const BYTE* const istart4 = istart3 + length3;
577e0c1b49fSNick Terrell         const size_t segmentSize = (dstSize+3) / 4;
578e0c1b49fSNick Terrell         BYTE* const opStart2 = ostart + segmentSize;
579e0c1b49fSNick Terrell         BYTE* const opStart3 = opStart2 + segmentSize;
580e0c1b49fSNick Terrell         BYTE* const opStart4 = opStart3 + segmentSize;
581e0c1b49fSNick Terrell         BYTE* op1 = ostart;
582e0c1b49fSNick Terrell         BYTE* op2 = opStart2;
583e0c1b49fSNick Terrell         BYTE* op3 = opStart3;
584e0c1b49fSNick Terrell         BYTE* op4 = opStart4;
585e0c1b49fSNick Terrell         DTableDesc const dtd = HUF_getDTableDesc(DTable);
586e0c1b49fSNick Terrell         U32 const dtLog = dtd.tableLog;
587e0c1b49fSNick Terrell         U32 endSignal = 1;
588e0c1b49fSNick Terrell 
589e0c1b49fSNick Terrell         if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
590*2aa14b1aSNick Terrell         if (opStart4 > oend) return ERROR(corruption_detected);      /* overflow */
591e0c1b49fSNick Terrell         CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
592e0c1b49fSNick Terrell         CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
593e0c1b49fSNick Terrell         CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
594e0c1b49fSNick Terrell         CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
595e0c1b49fSNick Terrell 
596e0c1b49fSNick Terrell         /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
597*2aa14b1aSNick Terrell         if ((size_t)(oend - op4) >= sizeof(size_t)) {
598e0c1b49fSNick Terrell             for ( ; (endSignal) & (op4 < olimit) ; ) {
599e0c1b49fSNick Terrell                 HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
600e0c1b49fSNick Terrell                 HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
601e0c1b49fSNick Terrell                 HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
602e0c1b49fSNick Terrell                 HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
603e0c1b49fSNick Terrell                 HUF_DECODE_SYMBOLX1_1(op1, &bitD1);
604e0c1b49fSNick Terrell                 HUF_DECODE_SYMBOLX1_1(op2, &bitD2);
605e0c1b49fSNick Terrell                 HUF_DECODE_SYMBOLX1_1(op3, &bitD3);
606e0c1b49fSNick Terrell                 HUF_DECODE_SYMBOLX1_1(op4, &bitD4);
607e0c1b49fSNick Terrell                 HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
608e0c1b49fSNick Terrell                 HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
609e0c1b49fSNick Terrell                 HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
610e0c1b49fSNick Terrell                 HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
611e0c1b49fSNick Terrell                 HUF_DECODE_SYMBOLX1_0(op1, &bitD1);
612e0c1b49fSNick Terrell                 HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
613e0c1b49fSNick Terrell                 HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
614e0c1b49fSNick Terrell                 HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
615e0c1b49fSNick Terrell                 endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
616e0c1b49fSNick Terrell                 endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
617e0c1b49fSNick Terrell                 endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
618e0c1b49fSNick Terrell                 endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
619e0c1b49fSNick Terrell             }
620*2aa14b1aSNick Terrell         }
621e0c1b49fSNick Terrell 
622e0c1b49fSNick Terrell         /* check corruption */
623e0c1b49fSNick Terrell         /* note : should not be necessary : op# advance in lock step, and we control op4.
624e0c1b49fSNick Terrell          *        but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */
625e0c1b49fSNick Terrell         if (op1 > opStart2) return ERROR(corruption_detected);
626e0c1b49fSNick Terrell         if (op2 > opStart3) return ERROR(corruption_detected);
627e0c1b49fSNick Terrell         if (op3 > opStart4) return ERROR(corruption_detected);
628e0c1b49fSNick Terrell         /* note : op4 supposed already verified within main loop */
629e0c1b49fSNick Terrell 
630e0c1b49fSNick Terrell         /* finish bitStreams one by one */
631e0c1b49fSNick Terrell         HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog);
632e0c1b49fSNick Terrell         HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog);
633e0c1b49fSNick Terrell         HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog);
634e0c1b49fSNick Terrell         HUF_decodeStreamX1(op4, &bitD4, oend,     dt, dtLog);
635e0c1b49fSNick Terrell 
636e0c1b49fSNick Terrell         /* check */
637e0c1b49fSNick Terrell         { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
638e0c1b49fSNick Terrell           if (!endCheck) return ERROR(corruption_detected); }
639e0c1b49fSNick Terrell 
640e0c1b49fSNick Terrell         /* decoded size */
641e0c1b49fSNick Terrell         return dstSize;
642e0c1b49fSNick Terrell     }
643e0c1b49fSNick Terrell }
644e0c1b49fSNick Terrell 
645*2aa14b1aSNick Terrell #if HUF_NEED_BMI2_FUNCTION
646*2aa14b1aSNick Terrell static BMI2_TARGET_ATTRIBUTE
647*2aa14b1aSNick Terrell size_t HUF_decompress4X1_usingDTable_internal_bmi2(void* dst, size_t dstSize, void const* cSrc,
648*2aa14b1aSNick Terrell                     size_t cSrcSize, HUF_DTable const* DTable) {
649*2aa14b1aSNick Terrell     return HUF_decompress4X1_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
650*2aa14b1aSNick Terrell }
651*2aa14b1aSNick Terrell #endif
652*2aa14b1aSNick Terrell 
653*2aa14b1aSNick Terrell #if HUF_NEED_DEFAULT_FUNCTION
654*2aa14b1aSNick Terrell static
655*2aa14b1aSNick Terrell size_t HUF_decompress4X1_usingDTable_internal_default(void* dst, size_t dstSize, void const* cSrc,
656*2aa14b1aSNick Terrell                     size_t cSrcSize, HUF_DTable const* DTable) {
657*2aa14b1aSNick Terrell     return HUF_decompress4X1_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
658*2aa14b1aSNick Terrell }
659*2aa14b1aSNick Terrell #endif
660*2aa14b1aSNick Terrell 
661*2aa14b1aSNick Terrell #if ZSTD_ENABLE_ASM_X86_64_BMI2
662*2aa14b1aSNick Terrell 
663*2aa14b1aSNick Terrell HUF_ASM_DECL void HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop(HUF_DecompressAsmArgs* args) ZSTDLIB_HIDDEN;
664*2aa14b1aSNick Terrell 
665*2aa14b1aSNick Terrell static HUF_ASM_X86_64_BMI2_ATTRS
666*2aa14b1aSNick Terrell size_t
667*2aa14b1aSNick Terrell HUF_decompress4X1_usingDTable_internal_bmi2_asm(
668*2aa14b1aSNick Terrell           void* dst,  size_t dstSize,
669*2aa14b1aSNick Terrell     const void* cSrc, size_t cSrcSize,
670*2aa14b1aSNick Terrell     const HUF_DTable* DTable)
671*2aa14b1aSNick Terrell {
672*2aa14b1aSNick Terrell     void const* dt = DTable + 1;
673*2aa14b1aSNick Terrell     const BYTE* const iend = (const BYTE*)cSrc + 6;
674*2aa14b1aSNick Terrell     BYTE* const oend = (BYTE*)dst + dstSize;
675*2aa14b1aSNick Terrell     HUF_DecompressAsmArgs args;
676*2aa14b1aSNick Terrell     {
677*2aa14b1aSNick Terrell         size_t const ret = HUF_DecompressAsmArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
678*2aa14b1aSNick Terrell         FORWARD_IF_ERROR(ret, "Failed to init asm args");
679*2aa14b1aSNick Terrell         if (ret != 0)
680*2aa14b1aSNick Terrell             return HUF_decompress4X1_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
681*2aa14b1aSNick Terrell     }
682*2aa14b1aSNick Terrell 
683*2aa14b1aSNick Terrell     assert(args.ip[0] >= args.ilimit);
684*2aa14b1aSNick Terrell     HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop(&args);
685*2aa14b1aSNick Terrell 
686*2aa14b1aSNick Terrell     /* Our loop guarantees that ip[] >= ilimit and that we haven't
687*2aa14b1aSNick Terrell     * overwritten any op[].
688*2aa14b1aSNick Terrell     */
689*2aa14b1aSNick Terrell     assert(args.ip[0] >= iend);
690*2aa14b1aSNick Terrell     assert(args.ip[1] >= iend);
691*2aa14b1aSNick Terrell     assert(args.ip[2] >= iend);
692*2aa14b1aSNick Terrell     assert(args.ip[3] >= iend);
693*2aa14b1aSNick Terrell     assert(args.op[3] <= oend);
694*2aa14b1aSNick Terrell     (void)iend;
695*2aa14b1aSNick Terrell 
696*2aa14b1aSNick Terrell     /* finish bit streams one by one. */
697*2aa14b1aSNick Terrell     {
698*2aa14b1aSNick Terrell         size_t const segmentSize = (dstSize+3) / 4;
699*2aa14b1aSNick Terrell         BYTE* segmentEnd = (BYTE*)dst;
700*2aa14b1aSNick Terrell         int i;
701*2aa14b1aSNick Terrell         for (i = 0; i < 4; ++i) {
702*2aa14b1aSNick Terrell             BIT_DStream_t bit;
703*2aa14b1aSNick Terrell             if (segmentSize <= (size_t)(oend - segmentEnd))
704*2aa14b1aSNick Terrell                 segmentEnd += segmentSize;
705*2aa14b1aSNick Terrell             else
706*2aa14b1aSNick Terrell                 segmentEnd = oend;
707*2aa14b1aSNick Terrell             FORWARD_IF_ERROR(HUF_initRemainingDStream(&bit, &args, i, segmentEnd), "corruption");
708*2aa14b1aSNick Terrell             /* Decompress and validate that we've produced exactly the expected length. */
709*2aa14b1aSNick Terrell             args.op[i] += HUF_decodeStreamX1(args.op[i], &bit, segmentEnd, (HUF_DEltX1 const*)dt, HUF_DECODER_FAST_TABLELOG);
710*2aa14b1aSNick Terrell             if (args.op[i] != segmentEnd) return ERROR(corruption_detected);
711*2aa14b1aSNick Terrell         }
712*2aa14b1aSNick Terrell     }
713*2aa14b1aSNick Terrell 
714*2aa14b1aSNick Terrell     /* decoded size */
715*2aa14b1aSNick Terrell     return dstSize;
716*2aa14b1aSNick Terrell }
717*2aa14b1aSNick Terrell #endif /* ZSTD_ENABLE_ASM_X86_64_BMI2 */
718e0c1b49fSNick Terrell 
719e0c1b49fSNick Terrell typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
720e0c1b49fSNick Terrell                                                const void *cSrc,
721e0c1b49fSNick Terrell                                                size_t cSrcSize,
722e0c1b49fSNick Terrell                                                const HUF_DTable *DTable);
723e0c1b49fSNick Terrell 
724e0c1b49fSNick Terrell HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
725e0c1b49fSNick Terrell 
726*2aa14b1aSNick Terrell static size_t HUF_decompress4X1_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc,
727*2aa14b1aSNick Terrell                     size_t cSrcSize, HUF_DTable const* DTable, int bmi2)
728*2aa14b1aSNick Terrell {
729*2aa14b1aSNick Terrell #if DYNAMIC_BMI2
730*2aa14b1aSNick Terrell     if (bmi2) {
731*2aa14b1aSNick Terrell # if ZSTD_ENABLE_ASM_X86_64_BMI2
732*2aa14b1aSNick Terrell         return HUF_decompress4X1_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);
733*2aa14b1aSNick Terrell # else
734*2aa14b1aSNick Terrell         return HUF_decompress4X1_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
735*2aa14b1aSNick Terrell # endif
736*2aa14b1aSNick Terrell     }
737*2aa14b1aSNick Terrell #else
738*2aa14b1aSNick Terrell     (void)bmi2;
739*2aa14b1aSNick Terrell #endif
740*2aa14b1aSNick Terrell 
741*2aa14b1aSNick Terrell #if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)
742*2aa14b1aSNick Terrell     return HUF_decompress4X1_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);
743*2aa14b1aSNick Terrell #else
744*2aa14b1aSNick Terrell     return HUF_decompress4X1_usingDTable_internal_default(dst, dstSize, cSrc, cSrcSize, DTable);
745*2aa14b1aSNick Terrell #endif
746*2aa14b1aSNick Terrell }
747e0c1b49fSNick Terrell 
748e0c1b49fSNick Terrell 
749e0c1b49fSNick Terrell size_t HUF_decompress1X1_usingDTable(
750e0c1b49fSNick Terrell           void* dst,  size_t dstSize,
751e0c1b49fSNick Terrell     const void* cSrc, size_t cSrcSize,
752e0c1b49fSNick Terrell     const HUF_DTable* DTable)
753e0c1b49fSNick Terrell {
754e0c1b49fSNick Terrell     DTableDesc dtd = HUF_getDTableDesc(DTable);
755e0c1b49fSNick Terrell     if (dtd.tableType != 0) return ERROR(GENERIC);
756e0c1b49fSNick Terrell     return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
757e0c1b49fSNick Terrell }
758e0c1b49fSNick Terrell 
759e0c1b49fSNick Terrell size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
760e0c1b49fSNick Terrell                                    const void* cSrc, size_t cSrcSize,
761e0c1b49fSNick Terrell                                    void* workSpace, size_t wkspSize)
762e0c1b49fSNick Terrell {
763e0c1b49fSNick Terrell     const BYTE* ip = (const BYTE*) cSrc;
764e0c1b49fSNick Terrell 
765e0c1b49fSNick Terrell     size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
766e0c1b49fSNick Terrell     if (HUF_isError(hSize)) return hSize;
767e0c1b49fSNick Terrell     if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
768e0c1b49fSNick Terrell     ip += hSize; cSrcSize -= hSize;
769e0c1b49fSNick Terrell 
770e0c1b49fSNick Terrell     return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
771e0c1b49fSNick Terrell }
772e0c1b49fSNick Terrell 
773e0c1b49fSNick Terrell 
774e0c1b49fSNick Terrell size_t HUF_decompress4X1_usingDTable(
775e0c1b49fSNick Terrell           void* dst,  size_t dstSize,
776e0c1b49fSNick Terrell     const void* cSrc, size_t cSrcSize,
777e0c1b49fSNick Terrell     const HUF_DTable* DTable)
778e0c1b49fSNick Terrell {
779e0c1b49fSNick Terrell     DTableDesc dtd = HUF_getDTableDesc(DTable);
780e0c1b49fSNick Terrell     if (dtd.tableType != 0) return ERROR(GENERIC);
781e0c1b49fSNick Terrell     return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
782e0c1b49fSNick Terrell }
783e0c1b49fSNick Terrell 
784e0c1b49fSNick Terrell static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
785e0c1b49fSNick Terrell                                    const void* cSrc, size_t cSrcSize,
786e0c1b49fSNick Terrell                                    void* workSpace, size_t wkspSize, int bmi2)
787e0c1b49fSNick Terrell {
788e0c1b49fSNick Terrell     const BYTE* ip = (const BYTE*) cSrc;
789e0c1b49fSNick Terrell 
790e0c1b49fSNick Terrell     size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
791e0c1b49fSNick Terrell     if (HUF_isError(hSize)) return hSize;
792e0c1b49fSNick Terrell     if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
793e0c1b49fSNick Terrell     ip += hSize; cSrcSize -= hSize;
794e0c1b49fSNick Terrell 
795e0c1b49fSNick Terrell     return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
796e0c1b49fSNick Terrell }
797e0c1b49fSNick Terrell 
798e0c1b49fSNick Terrell size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
799e0c1b49fSNick Terrell                                    const void* cSrc, size_t cSrcSize,
800e0c1b49fSNick Terrell                                    void* workSpace, size_t wkspSize)
801e0c1b49fSNick Terrell {
802e0c1b49fSNick Terrell     return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
803e0c1b49fSNick Terrell }
804e0c1b49fSNick Terrell 
805e0c1b49fSNick Terrell 
806e0c1b49fSNick Terrell #endif /* HUF_FORCE_DECOMPRESS_X2 */
807e0c1b49fSNick Terrell 
808e0c1b49fSNick Terrell 
809e0c1b49fSNick Terrell #ifndef HUF_FORCE_DECOMPRESS_X1
810e0c1b49fSNick Terrell 
811e0c1b49fSNick Terrell /* *************************/
812e0c1b49fSNick Terrell /* double-symbols decoding */
813e0c1b49fSNick Terrell /* *************************/
814e0c1b49fSNick Terrell 
815e0c1b49fSNick Terrell typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2;  /* double-symbols decoding */
816*2aa14b1aSNick Terrell typedef struct { BYTE symbol; } sortedSymbol_t;
817e0c1b49fSNick Terrell typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
818e0c1b49fSNick Terrell typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
819e0c1b49fSNick Terrell 
820*2aa14b1aSNick Terrell /*
821*2aa14b1aSNick Terrell  * Constructs a HUF_DEltX2 in a U32.
822*2aa14b1aSNick Terrell  */
823*2aa14b1aSNick Terrell static U32 HUF_buildDEltX2U32(U32 symbol, U32 nbBits, U32 baseSeq, int level)
824*2aa14b1aSNick Terrell {
825*2aa14b1aSNick Terrell     U32 seq;
826*2aa14b1aSNick Terrell     DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, sequence) == 0);
827*2aa14b1aSNick Terrell     DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, nbBits) == 2);
828*2aa14b1aSNick Terrell     DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, length) == 3);
829*2aa14b1aSNick Terrell     DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(U32));
830*2aa14b1aSNick Terrell     if (MEM_isLittleEndian()) {
831*2aa14b1aSNick Terrell         seq = level == 1 ? symbol : (baseSeq + (symbol << 8));
832*2aa14b1aSNick Terrell         return seq + (nbBits << 16) + ((U32)level << 24);
833*2aa14b1aSNick Terrell     } else {
834*2aa14b1aSNick Terrell         seq = level == 1 ? (symbol << 8) : ((baseSeq << 8) + symbol);
835*2aa14b1aSNick Terrell         return (seq << 16) + (nbBits << 8) + (U32)level;
836*2aa14b1aSNick Terrell     }
837*2aa14b1aSNick Terrell }
838*2aa14b1aSNick Terrell 
839*2aa14b1aSNick Terrell /*
840*2aa14b1aSNick Terrell  * Constructs a HUF_DEltX2.
841*2aa14b1aSNick Terrell  */
842*2aa14b1aSNick Terrell static HUF_DEltX2 HUF_buildDEltX2(U32 symbol, U32 nbBits, U32 baseSeq, int level)
843*2aa14b1aSNick Terrell {
844*2aa14b1aSNick Terrell     HUF_DEltX2 DElt;
845*2aa14b1aSNick Terrell     U32 const val = HUF_buildDEltX2U32(symbol, nbBits, baseSeq, level);
846*2aa14b1aSNick Terrell     DEBUG_STATIC_ASSERT(sizeof(DElt) == sizeof(val));
847*2aa14b1aSNick Terrell     ZSTD_memcpy(&DElt, &val, sizeof(val));
848*2aa14b1aSNick Terrell     return DElt;
849*2aa14b1aSNick Terrell }
850*2aa14b1aSNick Terrell 
851*2aa14b1aSNick Terrell /*
852*2aa14b1aSNick Terrell  * Constructs 2 HUF_DEltX2s and packs them into a U64.
853*2aa14b1aSNick Terrell  */
854*2aa14b1aSNick Terrell static U64 HUF_buildDEltX2U64(U32 symbol, U32 nbBits, U16 baseSeq, int level)
855*2aa14b1aSNick Terrell {
856*2aa14b1aSNick Terrell     U32 DElt = HUF_buildDEltX2U32(symbol, nbBits, baseSeq, level);
857*2aa14b1aSNick Terrell     return (U64)DElt + ((U64)DElt << 32);
858*2aa14b1aSNick Terrell }
859*2aa14b1aSNick Terrell 
860*2aa14b1aSNick Terrell /*
861*2aa14b1aSNick Terrell  * Fills the DTable rank with all the symbols from [begin, end) that are each
862*2aa14b1aSNick Terrell  * nbBits long.
863*2aa14b1aSNick Terrell  *
864*2aa14b1aSNick Terrell  * @param DTableRank The start of the rank in the DTable.
865*2aa14b1aSNick Terrell  * @param begin The first symbol to fill (inclusive).
866*2aa14b1aSNick Terrell  * @param end The last symbol to fill (exclusive).
867*2aa14b1aSNick Terrell  * @param nbBits Each symbol is nbBits long.
868*2aa14b1aSNick Terrell  * @param tableLog The table log.
869*2aa14b1aSNick Terrell  * @param baseSeq If level == 1 { 0 } else { the first level symbol }
870*2aa14b1aSNick Terrell  * @param level The level in the table. Must be 1 or 2.
871*2aa14b1aSNick Terrell  */
872*2aa14b1aSNick Terrell static void HUF_fillDTableX2ForWeight(
873*2aa14b1aSNick Terrell     HUF_DEltX2* DTableRank,
874*2aa14b1aSNick Terrell     sortedSymbol_t const* begin, sortedSymbol_t const* end,
875*2aa14b1aSNick Terrell     U32 nbBits, U32 tableLog,
876*2aa14b1aSNick Terrell     U16 baseSeq, int const level)
877*2aa14b1aSNick Terrell {
878*2aa14b1aSNick Terrell     U32 const length = 1U << ((tableLog - nbBits) & 0x1F /* quiet static-analyzer */);
879*2aa14b1aSNick Terrell     const sortedSymbol_t* ptr;
880*2aa14b1aSNick Terrell     assert(level >= 1 && level <= 2);
881*2aa14b1aSNick Terrell     switch (length) {
882*2aa14b1aSNick Terrell     case 1:
883*2aa14b1aSNick Terrell         for (ptr = begin; ptr != end; ++ptr) {
884*2aa14b1aSNick Terrell             HUF_DEltX2 const DElt = HUF_buildDEltX2(ptr->symbol, nbBits, baseSeq, level);
885*2aa14b1aSNick Terrell             *DTableRank++ = DElt;
886*2aa14b1aSNick Terrell         }
887*2aa14b1aSNick Terrell         break;
888*2aa14b1aSNick Terrell     case 2:
889*2aa14b1aSNick Terrell         for (ptr = begin; ptr != end; ++ptr) {
890*2aa14b1aSNick Terrell             HUF_DEltX2 const DElt = HUF_buildDEltX2(ptr->symbol, nbBits, baseSeq, level);
891*2aa14b1aSNick Terrell             DTableRank[0] = DElt;
892*2aa14b1aSNick Terrell             DTableRank[1] = DElt;
893*2aa14b1aSNick Terrell             DTableRank += 2;
894*2aa14b1aSNick Terrell         }
895*2aa14b1aSNick Terrell         break;
896*2aa14b1aSNick Terrell     case 4:
897*2aa14b1aSNick Terrell         for (ptr = begin; ptr != end; ++ptr) {
898*2aa14b1aSNick Terrell             U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level);
899*2aa14b1aSNick Terrell             ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2));
900*2aa14b1aSNick Terrell             ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2));
901*2aa14b1aSNick Terrell             DTableRank += 4;
902*2aa14b1aSNick Terrell         }
903*2aa14b1aSNick Terrell         break;
904*2aa14b1aSNick Terrell     case 8:
905*2aa14b1aSNick Terrell         for (ptr = begin; ptr != end; ++ptr) {
906*2aa14b1aSNick Terrell             U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level);
907*2aa14b1aSNick Terrell             ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2));
908*2aa14b1aSNick Terrell             ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2));
909*2aa14b1aSNick Terrell             ZSTD_memcpy(DTableRank + 4, &DEltX2, sizeof(DEltX2));
910*2aa14b1aSNick Terrell             ZSTD_memcpy(DTableRank + 6, &DEltX2, sizeof(DEltX2));
911*2aa14b1aSNick Terrell             DTableRank += 8;
912*2aa14b1aSNick Terrell         }
913*2aa14b1aSNick Terrell         break;
914*2aa14b1aSNick Terrell     default:
915*2aa14b1aSNick Terrell         for (ptr = begin; ptr != end; ++ptr) {
916*2aa14b1aSNick Terrell             U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level);
917*2aa14b1aSNick Terrell             HUF_DEltX2* const DTableRankEnd = DTableRank + length;
918*2aa14b1aSNick Terrell             for (; DTableRank != DTableRankEnd; DTableRank += 8) {
919*2aa14b1aSNick Terrell                 ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2));
920*2aa14b1aSNick Terrell                 ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2));
921*2aa14b1aSNick Terrell                 ZSTD_memcpy(DTableRank + 4, &DEltX2, sizeof(DEltX2));
922*2aa14b1aSNick Terrell                 ZSTD_memcpy(DTableRank + 6, &DEltX2, sizeof(DEltX2));
923*2aa14b1aSNick Terrell             }
924*2aa14b1aSNick Terrell         }
925*2aa14b1aSNick Terrell         break;
926*2aa14b1aSNick Terrell     }
927*2aa14b1aSNick Terrell }
928e0c1b49fSNick Terrell 
929e0c1b49fSNick Terrell /* HUF_fillDTableX2Level2() :
930e0c1b49fSNick Terrell  * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
931*2aa14b1aSNick Terrell static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 targetLog, const U32 consumedBits,
932*2aa14b1aSNick Terrell                            const U32* rankVal, const int minWeight, const int maxWeight1,
933*2aa14b1aSNick Terrell                            const sortedSymbol_t* sortedSymbols, U32 const* rankStart,
934*2aa14b1aSNick Terrell                            U32 nbBitsBaseline, U16 baseSeq)
935e0c1b49fSNick Terrell {
936*2aa14b1aSNick Terrell     /* Fill skipped values (all positions up to rankVal[minWeight]).
937*2aa14b1aSNick Terrell      * These are positions only get a single symbol because the combined weight
938*2aa14b1aSNick Terrell      * is too large.
939*2aa14b1aSNick Terrell      */
940e0c1b49fSNick Terrell     if (minWeight>1) {
941*2aa14b1aSNick Terrell         U32 const length = 1U << ((targetLog - consumedBits) & 0x1F /* quiet static-analyzer */);
942*2aa14b1aSNick Terrell         U64 const DEltX2 = HUF_buildDEltX2U64(baseSeq, consumedBits, /* baseSeq */ 0, /* level */ 1);
943*2aa14b1aSNick Terrell         int const skipSize = rankVal[minWeight];
944*2aa14b1aSNick Terrell         assert(length > 1);
945*2aa14b1aSNick Terrell         assert((U32)skipSize < length);
946*2aa14b1aSNick Terrell         switch (length) {
947*2aa14b1aSNick Terrell         case 2:
948*2aa14b1aSNick Terrell             assert(skipSize == 1);
949*2aa14b1aSNick Terrell             ZSTD_memcpy(DTable, &DEltX2, sizeof(DEltX2));
950*2aa14b1aSNick Terrell             break;
951*2aa14b1aSNick Terrell         case 4:
952*2aa14b1aSNick Terrell             assert(skipSize <= 4);
953*2aa14b1aSNick Terrell             ZSTD_memcpy(DTable + 0, &DEltX2, sizeof(DEltX2));
954*2aa14b1aSNick Terrell             ZSTD_memcpy(DTable + 2, &DEltX2, sizeof(DEltX2));
955*2aa14b1aSNick Terrell             break;
956*2aa14b1aSNick Terrell         default:
957*2aa14b1aSNick Terrell             {
958*2aa14b1aSNick Terrell                 int i;
959*2aa14b1aSNick Terrell                 for (i = 0; i < skipSize; i += 8) {
960*2aa14b1aSNick Terrell                     ZSTD_memcpy(DTable + i + 0, &DEltX2, sizeof(DEltX2));
961*2aa14b1aSNick Terrell                     ZSTD_memcpy(DTable + i + 2, &DEltX2, sizeof(DEltX2));
962*2aa14b1aSNick Terrell                     ZSTD_memcpy(DTable + i + 4, &DEltX2, sizeof(DEltX2));
963*2aa14b1aSNick Terrell                     ZSTD_memcpy(DTable + i + 6, &DEltX2, sizeof(DEltX2));
964*2aa14b1aSNick Terrell                 }
965*2aa14b1aSNick Terrell             }
966*2aa14b1aSNick Terrell         }
967e0c1b49fSNick Terrell     }
968e0c1b49fSNick Terrell 
969*2aa14b1aSNick Terrell     /* Fill each of the second level symbols by weight. */
970*2aa14b1aSNick Terrell     {
971*2aa14b1aSNick Terrell         int w;
972*2aa14b1aSNick Terrell         for (w = minWeight; w < maxWeight1; ++w) {
973*2aa14b1aSNick Terrell             int const begin = rankStart[w];
974*2aa14b1aSNick Terrell             int const end = rankStart[w+1];
975*2aa14b1aSNick Terrell             U32 const nbBits = nbBitsBaseline - w;
976*2aa14b1aSNick Terrell             U32 const totalBits = nbBits + consumedBits;
977*2aa14b1aSNick Terrell             HUF_fillDTableX2ForWeight(
978*2aa14b1aSNick Terrell                 DTable + rankVal[w],
979*2aa14b1aSNick Terrell                 sortedSymbols + begin, sortedSymbols + end,
980*2aa14b1aSNick Terrell                 totalBits, targetLog,
981*2aa14b1aSNick Terrell                 baseSeq, /* level */ 2);
982e0c1b49fSNick Terrell         }
983*2aa14b1aSNick Terrell     }
984*2aa14b1aSNick Terrell }
985e0c1b49fSNick Terrell 
986e0c1b49fSNick Terrell static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
987*2aa14b1aSNick Terrell                            const sortedSymbol_t* sortedList,
988e0c1b49fSNick Terrell                            const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
989*2aa14b1aSNick Terrell                            const U32 nbBitsBaseline)
990e0c1b49fSNick Terrell {
991*2aa14b1aSNick Terrell     U32* const rankVal = rankValOrigin[0];
992e0c1b49fSNick Terrell     const int scaleLog = nbBitsBaseline - targetLog;   /* note : targetLog >= srcLog, hence scaleLog <= 1 */
993e0c1b49fSNick Terrell     const U32 minBits  = nbBitsBaseline - maxWeight;
994*2aa14b1aSNick Terrell     int w;
995*2aa14b1aSNick Terrell     int const wEnd = (int)maxWeight + 1;
996e0c1b49fSNick Terrell 
997*2aa14b1aSNick Terrell     /* Fill DTable in order of weight. */
998*2aa14b1aSNick Terrell     for (w = 1; w < wEnd; ++w) {
999*2aa14b1aSNick Terrell         int const begin = (int)rankStart[w];
1000*2aa14b1aSNick Terrell         int const end = (int)rankStart[w+1];
1001*2aa14b1aSNick Terrell         U32 const nbBits = nbBitsBaseline - w;
1002e0c1b49fSNick Terrell 
1003*2aa14b1aSNick Terrell         if (targetLog-nbBits >= minBits) {
1004*2aa14b1aSNick Terrell             /* Enough room for a second symbol. */
1005*2aa14b1aSNick Terrell             int start = rankVal[w];
1006*2aa14b1aSNick Terrell             U32 const length = 1U << ((targetLog - nbBits) & 0x1F /* quiet static-analyzer */);
1007e0c1b49fSNick Terrell             int minWeight = nbBits + scaleLog;
1008*2aa14b1aSNick Terrell             int s;
1009e0c1b49fSNick Terrell             if (minWeight < 1) minWeight = 1;
1010*2aa14b1aSNick Terrell             /* Fill the DTable for every symbol of weight w.
1011*2aa14b1aSNick Terrell              * These symbols get at least 1 second symbol.
1012*2aa14b1aSNick Terrell              */
1013*2aa14b1aSNick Terrell             for (s = begin; s != end; ++s) {
1014*2aa14b1aSNick Terrell                 HUF_fillDTableX2Level2(
1015*2aa14b1aSNick Terrell                     DTable + start, targetLog, nbBits,
1016*2aa14b1aSNick Terrell                     rankValOrigin[nbBits], minWeight, wEnd,
1017*2aa14b1aSNick Terrell                     sortedList, rankStart,
1018*2aa14b1aSNick Terrell                     nbBitsBaseline, sortedList[s].symbol);
1019*2aa14b1aSNick Terrell                 start += length;
1020*2aa14b1aSNick Terrell             }
1021e0c1b49fSNick Terrell         } else {
1022*2aa14b1aSNick Terrell             /* Only a single symbol. */
1023*2aa14b1aSNick Terrell             HUF_fillDTableX2ForWeight(
1024*2aa14b1aSNick Terrell                 DTable + rankVal[w],
1025*2aa14b1aSNick Terrell                 sortedList + begin, sortedList + end,
1026*2aa14b1aSNick Terrell                 nbBits, targetLog,
1027*2aa14b1aSNick Terrell                 /* baseSeq */ 0, /* level */ 1);
1028*2aa14b1aSNick Terrell         }
1029e0c1b49fSNick Terrell     }
1030e0c1b49fSNick Terrell }
1031e0c1b49fSNick Terrell 
1032e0c1b49fSNick Terrell typedef struct {
1033e0c1b49fSNick Terrell     rankValCol_t rankVal[HUF_TABLELOG_MAX];
1034e0c1b49fSNick Terrell     U32 rankStats[HUF_TABLELOG_MAX + 1];
1035*2aa14b1aSNick Terrell     U32 rankStart0[HUF_TABLELOG_MAX + 3];
1036e0c1b49fSNick Terrell     sortedSymbol_t sortedSymbol[HUF_SYMBOLVALUE_MAX + 1];
1037e0c1b49fSNick Terrell     BYTE weightList[HUF_SYMBOLVALUE_MAX + 1];
1038e0c1b49fSNick Terrell     U32 calleeWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
1039e0c1b49fSNick Terrell } HUF_ReadDTableX2_Workspace;
1040e0c1b49fSNick Terrell 
1041e0c1b49fSNick Terrell size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
1042e0c1b49fSNick Terrell                        const void* src, size_t srcSize,
1043e0c1b49fSNick Terrell                              void* workSpace, size_t wkspSize)
1044e0c1b49fSNick Terrell {
1045*2aa14b1aSNick Terrell     return HUF_readDTableX2_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0);
1046*2aa14b1aSNick Terrell }
1047*2aa14b1aSNick Terrell 
1048*2aa14b1aSNick Terrell size_t HUF_readDTableX2_wksp_bmi2(HUF_DTable* DTable,
1049*2aa14b1aSNick Terrell                        const void* src, size_t srcSize,
1050*2aa14b1aSNick Terrell                              void* workSpace, size_t wkspSize, int bmi2)
1051*2aa14b1aSNick Terrell {
1052*2aa14b1aSNick Terrell     U32 tableLog, maxW, nbSymbols;
1053e0c1b49fSNick Terrell     DTableDesc dtd = HUF_getDTableDesc(DTable);
1054*2aa14b1aSNick Terrell     U32 maxTableLog = dtd.maxTableLog;
1055e0c1b49fSNick Terrell     size_t iSize;
1056e0c1b49fSNick Terrell     void* dtPtr = DTable+1;   /* force compiler to avoid strict-aliasing */
1057e0c1b49fSNick Terrell     HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
1058e0c1b49fSNick Terrell     U32 *rankStart;
1059e0c1b49fSNick Terrell 
1060e0c1b49fSNick Terrell     HUF_ReadDTableX2_Workspace* const wksp = (HUF_ReadDTableX2_Workspace*)workSpace;
1061e0c1b49fSNick Terrell 
1062e0c1b49fSNick Terrell     if (sizeof(*wksp) > wkspSize) return ERROR(GENERIC);
1063e0c1b49fSNick Terrell 
1064e0c1b49fSNick Terrell     rankStart = wksp->rankStart0 + 1;
1065e0c1b49fSNick Terrell     ZSTD_memset(wksp->rankStats, 0, sizeof(wksp->rankStats));
1066e0c1b49fSNick Terrell     ZSTD_memset(wksp->rankStart0, 0, sizeof(wksp->rankStart0));
1067e0c1b49fSNick Terrell 
1068e0c1b49fSNick Terrell     DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable));   /* if compiler fails here, assertion is wrong */
1069e0c1b49fSNick Terrell     if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
1070e0c1b49fSNick Terrell     /* ZSTD_memset(weightList, 0, sizeof(weightList)); */  /* is not necessary, even though some analyzer complain ... */
1071e0c1b49fSNick Terrell 
1072*2aa14b1aSNick Terrell     iSize = HUF_readStats_wksp(wksp->weightList, HUF_SYMBOLVALUE_MAX + 1, wksp->rankStats, &nbSymbols, &tableLog, src, srcSize, wksp->calleeWksp, sizeof(wksp->calleeWksp), bmi2);
1073e0c1b49fSNick Terrell     if (HUF_isError(iSize)) return iSize;
1074e0c1b49fSNick Terrell 
1075e0c1b49fSNick Terrell     /* check result */
1076e0c1b49fSNick Terrell     if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge);   /* DTable can't fit code depth */
1077*2aa14b1aSNick Terrell     if (tableLog <= HUF_DECODER_FAST_TABLELOG && maxTableLog > HUF_DECODER_FAST_TABLELOG) maxTableLog = HUF_DECODER_FAST_TABLELOG;
1078e0c1b49fSNick Terrell 
1079e0c1b49fSNick Terrell     /* find maxWeight */
1080e0c1b49fSNick Terrell     for (maxW = tableLog; wksp->rankStats[maxW]==0; maxW--) {}  /* necessarily finds a solution before 0 */
1081e0c1b49fSNick Terrell 
1082e0c1b49fSNick Terrell     /* Get start index of each weight */
1083e0c1b49fSNick Terrell     {   U32 w, nextRankStart = 0;
1084e0c1b49fSNick Terrell         for (w=1; w<maxW+1; w++) {
1085e0c1b49fSNick Terrell             U32 curr = nextRankStart;
1086e0c1b49fSNick Terrell             nextRankStart += wksp->rankStats[w];
1087e0c1b49fSNick Terrell             rankStart[w] = curr;
1088e0c1b49fSNick Terrell         }
1089e0c1b49fSNick Terrell         rankStart[0] = nextRankStart;   /* put all 0w symbols at the end of sorted list*/
1090*2aa14b1aSNick Terrell         rankStart[maxW+1] = nextRankStart;
1091e0c1b49fSNick Terrell     }
1092e0c1b49fSNick Terrell 
1093e0c1b49fSNick Terrell     /* sort symbols by weight */
1094e0c1b49fSNick Terrell     {   U32 s;
1095e0c1b49fSNick Terrell         for (s=0; s<nbSymbols; s++) {
1096e0c1b49fSNick Terrell             U32 const w = wksp->weightList[s];
1097e0c1b49fSNick Terrell             U32 const r = rankStart[w]++;
1098e0c1b49fSNick Terrell             wksp->sortedSymbol[r].symbol = (BYTE)s;
1099e0c1b49fSNick Terrell         }
1100e0c1b49fSNick Terrell         rankStart[0] = 0;   /* forget 0w symbols; this is beginning of weight(1) */
1101e0c1b49fSNick Terrell     }
1102e0c1b49fSNick Terrell 
1103e0c1b49fSNick Terrell     /* Build rankVal */
1104e0c1b49fSNick Terrell     {   U32* const rankVal0 = wksp->rankVal[0];
1105e0c1b49fSNick Terrell         {   int const rescale = (maxTableLog-tableLog) - 1;   /* tableLog <= maxTableLog */
1106e0c1b49fSNick Terrell             U32 nextRankVal = 0;
1107e0c1b49fSNick Terrell             U32 w;
1108e0c1b49fSNick Terrell             for (w=1; w<maxW+1; w++) {
1109e0c1b49fSNick Terrell                 U32 curr = nextRankVal;
1110e0c1b49fSNick Terrell                 nextRankVal += wksp->rankStats[w] << (w+rescale);
1111e0c1b49fSNick Terrell                 rankVal0[w] = curr;
1112e0c1b49fSNick Terrell         }   }
1113e0c1b49fSNick Terrell         {   U32 const minBits = tableLog+1 - maxW;
1114e0c1b49fSNick Terrell             U32 consumed;
1115e0c1b49fSNick Terrell             for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
1116e0c1b49fSNick Terrell                 U32* const rankValPtr = wksp->rankVal[consumed];
1117e0c1b49fSNick Terrell                 U32 w;
1118e0c1b49fSNick Terrell                 for (w = 1; w < maxW+1; w++) {
1119e0c1b49fSNick Terrell                     rankValPtr[w] = rankVal0[w] >> consumed;
1120e0c1b49fSNick Terrell     }   }   }   }
1121e0c1b49fSNick Terrell 
1122e0c1b49fSNick Terrell     HUF_fillDTableX2(dt, maxTableLog,
1123*2aa14b1aSNick Terrell                    wksp->sortedSymbol,
1124e0c1b49fSNick Terrell                    wksp->rankStart0, wksp->rankVal, maxW,
1125*2aa14b1aSNick Terrell                    tableLog+1);
1126e0c1b49fSNick Terrell 
1127e0c1b49fSNick Terrell     dtd.tableLog = (BYTE)maxTableLog;
1128e0c1b49fSNick Terrell     dtd.tableType = 1;
1129e0c1b49fSNick Terrell     ZSTD_memcpy(DTable, &dtd, sizeof(dtd));
1130e0c1b49fSNick Terrell     return iSize;
1131e0c1b49fSNick Terrell }
1132e0c1b49fSNick Terrell 
1133e0c1b49fSNick Terrell 
1134e0c1b49fSNick Terrell FORCE_INLINE_TEMPLATE U32
1135e0c1b49fSNick Terrell HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
1136e0c1b49fSNick Terrell {
1137e0c1b49fSNick Terrell     size_t const val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
1138*2aa14b1aSNick Terrell     ZSTD_memcpy(op, &dt[val].sequence, 2);
1139e0c1b49fSNick Terrell     BIT_skipBits(DStream, dt[val].nbBits);
1140e0c1b49fSNick Terrell     return dt[val].length;
1141e0c1b49fSNick Terrell }
1142e0c1b49fSNick Terrell 
1143e0c1b49fSNick Terrell FORCE_INLINE_TEMPLATE U32
1144e0c1b49fSNick Terrell HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
1145e0c1b49fSNick Terrell {
1146e0c1b49fSNick Terrell     size_t const val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
1147*2aa14b1aSNick Terrell     ZSTD_memcpy(op, &dt[val].sequence, 1);
1148*2aa14b1aSNick Terrell     if (dt[val].length==1) {
1149*2aa14b1aSNick Terrell         BIT_skipBits(DStream, dt[val].nbBits);
1150*2aa14b1aSNick Terrell     } else {
1151e0c1b49fSNick Terrell         if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
1152e0c1b49fSNick Terrell             BIT_skipBits(DStream, dt[val].nbBits);
1153e0c1b49fSNick Terrell             if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
1154e0c1b49fSNick Terrell                 /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
1155e0c1b49fSNick Terrell                 DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
1156*2aa14b1aSNick Terrell         }
1157*2aa14b1aSNick Terrell     }
1158e0c1b49fSNick Terrell     return 1;
1159e0c1b49fSNick Terrell }
1160e0c1b49fSNick Terrell 
1161e0c1b49fSNick Terrell #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
1162e0c1b49fSNick Terrell     ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
1163e0c1b49fSNick Terrell 
1164e0c1b49fSNick Terrell #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
1165e0c1b49fSNick Terrell     if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
1166e0c1b49fSNick Terrell         ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
1167e0c1b49fSNick Terrell 
1168e0c1b49fSNick Terrell #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
1169e0c1b49fSNick Terrell     if (MEM_64bits()) \
1170e0c1b49fSNick Terrell         ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
1171e0c1b49fSNick Terrell 
1172e0c1b49fSNick Terrell HINT_INLINE size_t
1173e0c1b49fSNick Terrell HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
1174e0c1b49fSNick Terrell                 const HUF_DEltX2* const dt, const U32 dtLog)
1175e0c1b49fSNick Terrell {
1176e0c1b49fSNick Terrell     BYTE* const pStart = p;
1177e0c1b49fSNick Terrell 
1178e0c1b49fSNick Terrell     /* up to 8 symbols at a time */
1179*2aa14b1aSNick Terrell     if ((size_t)(pEnd - p) >= sizeof(bitDPtr->bitContainer)) {
1180*2aa14b1aSNick Terrell         if (dtLog <= 11 && MEM_64bits()) {
1181*2aa14b1aSNick Terrell             /* up to 10 symbols at a time */
1182*2aa14b1aSNick Terrell             while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-9)) {
1183*2aa14b1aSNick Terrell                 HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
1184*2aa14b1aSNick Terrell                 HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
1185*2aa14b1aSNick Terrell                 HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
1186*2aa14b1aSNick Terrell                 HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
1187*2aa14b1aSNick Terrell                 HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
1188*2aa14b1aSNick Terrell             }
1189*2aa14b1aSNick Terrell         } else {
1190*2aa14b1aSNick Terrell             /* up to 8 symbols at a time */
1191e0c1b49fSNick Terrell             while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
1192e0c1b49fSNick Terrell                 HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
1193e0c1b49fSNick Terrell                 HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
1194e0c1b49fSNick Terrell                 HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
1195e0c1b49fSNick Terrell                 HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
1196e0c1b49fSNick Terrell             }
1197*2aa14b1aSNick Terrell         }
1198*2aa14b1aSNick Terrell     } else {
1199*2aa14b1aSNick Terrell         BIT_reloadDStream(bitDPtr);
1200*2aa14b1aSNick Terrell     }
1201e0c1b49fSNick Terrell 
1202e0c1b49fSNick Terrell     /* closer to end : up to 2 symbols at a time */
1203*2aa14b1aSNick Terrell     if ((size_t)(pEnd - p) >= 2) {
1204e0c1b49fSNick Terrell         while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
1205e0c1b49fSNick Terrell             HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
1206e0c1b49fSNick Terrell 
1207e0c1b49fSNick Terrell         while (p <= pEnd-2)
1208e0c1b49fSNick Terrell             HUF_DECODE_SYMBOLX2_0(p, bitDPtr);   /* no need to reload : reached the end of DStream */
1209*2aa14b1aSNick Terrell     }
1210e0c1b49fSNick Terrell 
1211e0c1b49fSNick Terrell     if (p < pEnd)
1212e0c1b49fSNick Terrell         p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog);
1213e0c1b49fSNick Terrell 
1214e0c1b49fSNick Terrell     return p-pStart;
1215e0c1b49fSNick Terrell }
1216e0c1b49fSNick Terrell 
1217e0c1b49fSNick Terrell FORCE_INLINE_TEMPLATE size_t
1218e0c1b49fSNick Terrell HUF_decompress1X2_usingDTable_internal_body(
1219e0c1b49fSNick Terrell           void* dst,  size_t dstSize,
1220e0c1b49fSNick Terrell     const void* cSrc, size_t cSrcSize,
1221e0c1b49fSNick Terrell     const HUF_DTable* DTable)
1222e0c1b49fSNick Terrell {
1223e0c1b49fSNick Terrell     BIT_DStream_t bitD;
1224e0c1b49fSNick Terrell 
1225e0c1b49fSNick Terrell     /* Init */
1226e0c1b49fSNick Terrell     CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
1227e0c1b49fSNick Terrell 
1228e0c1b49fSNick Terrell     /* decode */
1229e0c1b49fSNick Terrell     {   BYTE* const ostart = (BYTE*) dst;
1230e0c1b49fSNick Terrell         BYTE* const oend = ostart + dstSize;
1231e0c1b49fSNick Terrell         const void* const dtPtr = DTable+1;   /* force compiler to not use strict-aliasing */
1232e0c1b49fSNick Terrell         const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
1233e0c1b49fSNick Terrell         DTableDesc const dtd = HUF_getDTableDesc(DTable);
1234e0c1b49fSNick Terrell         HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog);
1235e0c1b49fSNick Terrell     }
1236e0c1b49fSNick Terrell 
1237e0c1b49fSNick Terrell     /* check */
1238e0c1b49fSNick Terrell     if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
1239e0c1b49fSNick Terrell 
1240e0c1b49fSNick Terrell     /* decoded size */
1241e0c1b49fSNick Terrell     return dstSize;
1242e0c1b49fSNick Terrell }
1243e0c1b49fSNick Terrell FORCE_INLINE_TEMPLATE size_t
1244e0c1b49fSNick Terrell HUF_decompress4X2_usingDTable_internal_body(
1245e0c1b49fSNick Terrell           void* dst,  size_t dstSize,
1246e0c1b49fSNick Terrell     const void* cSrc, size_t cSrcSize,
1247e0c1b49fSNick Terrell     const HUF_DTable* DTable)
1248e0c1b49fSNick Terrell {
1249e0c1b49fSNick Terrell     if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
1250e0c1b49fSNick Terrell 
1251e0c1b49fSNick Terrell     {   const BYTE* const istart = (const BYTE*) cSrc;
1252e0c1b49fSNick Terrell         BYTE* const ostart = (BYTE*) dst;
1253e0c1b49fSNick Terrell         BYTE* const oend = ostart + dstSize;
1254e0c1b49fSNick Terrell         BYTE* const olimit = oend - (sizeof(size_t)-1);
1255e0c1b49fSNick Terrell         const void* const dtPtr = DTable+1;
1256e0c1b49fSNick Terrell         const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
1257e0c1b49fSNick Terrell 
1258e0c1b49fSNick Terrell         /* Init */
1259e0c1b49fSNick Terrell         BIT_DStream_t bitD1;
1260e0c1b49fSNick Terrell         BIT_DStream_t bitD2;
1261e0c1b49fSNick Terrell         BIT_DStream_t bitD3;
1262e0c1b49fSNick Terrell         BIT_DStream_t bitD4;
1263e0c1b49fSNick Terrell         size_t const length1 = MEM_readLE16(istart);
1264e0c1b49fSNick Terrell         size_t const length2 = MEM_readLE16(istart+2);
1265e0c1b49fSNick Terrell         size_t const length3 = MEM_readLE16(istart+4);
1266e0c1b49fSNick Terrell         size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
1267e0c1b49fSNick Terrell         const BYTE* const istart1 = istart + 6;  /* jumpTable */
1268e0c1b49fSNick Terrell         const BYTE* const istart2 = istart1 + length1;
1269e0c1b49fSNick Terrell         const BYTE* const istart3 = istart2 + length2;
1270e0c1b49fSNick Terrell         const BYTE* const istart4 = istart3 + length3;
1271e0c1b49fSNick Terrell         size_t const segmentSize = (dstSize+3) / 4;
1272e0c1b49fSNick Terrell         BYTE* const opStart2 = ostart + segmentSize;
1273e0c1b49fSNick Terrell         BYTE* const opStart3 = opStart2 + segmentSize;
1274e0c1b49fSNick Terrell         BYTE* const opStart4 = opStart3 + segmentSize;
1275e0c1b49fSNick Terrell         BYTE* op1 = ostart;
1276e0c1b49fSNick Terrell         BYTE* op2 = opStart2;
1277e0c1b49fSNick Terrell         BYTE* op3 = opStart3;
1278e0c1b49fSNick Terrell         BYTE* op4 = opStart4;
1279e0c1b49fSNick Terrell         U32 endSignal = 1;
1280e0c1b49fSNick Terrell         DTableDesc const dtd = HUF_getDTableDesc(DTable);
1281e0c1b49fSNick Terrell         U32 const dtLog = dtd.tableLog;
1282e0c1b49fSNick Terrell 
1283e0c1b49fSNick Terrell         if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
1284*2aa14b1aSNick Terrell         if (opStart4 > oend) return ERROR(corruption_detected);      /* overflow */
1285e0c1b49fSNick Terrell         CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
1286e0c1b49fSNick Terrell         CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
1287e0c1b49fSNick Terrell         CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
1288e0c1b49fSNick Terrell         CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
1289e0c1b49fSNick Terrell 
1290e0c1b49fSNick Terrell         /* 16-32 symbols per loop (4-8 symbols per stream) */
1291*2aa14b1aSNick Terrell         if ((size_t)(oend - op4) >= sizeof(size_t)) {
1292e0c1b49fSNick Terrell             for ( ; (endSignal) & (op4 < olimit); ) {
1293e0c1b49fSNick Terrell #if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
1294e0c1b49fSNick Terrell                 HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
1295e0c1b49fSNick Terrell                 HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
1296e0c1b49fSNick Terrell                 HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
1297e0c1b49fSNick Terrell                 HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
1298e0c1b49fSNick Terrell                 HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
1299e0c1b49fSNick Terrell                 HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
1300e0c1b49fSNick Terrell                 HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
1301e0c1b49fSNick Terrell                 HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
1302e0c1b49fSNick Terrell                 endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
1303e0c1b49fSNick Terrell                 endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
1304e0c1b49fSNick Terrell                 HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
1305e0c1b49fSNick Terrell                 HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
1306e0c1b49fSNick Terrell                 HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
1307e0c1b49fSNick Terrell                 HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
1308e0c1b49fSNick Terrell                 HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
1309e0c1b49fSNick Terrell                 HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
1310e0c1b49fSNick Terrell                 HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
1311e0c1b49fSNick Terrell                 HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
1312e0c1b49fSNick Terrell                 endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
1313e0c1b49fSNick Terrell                 endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
1314e0c1b49fSNick Terrell #else
1315e0c1b49fSNick Terrell                 HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
1316e0c1b49fSNick Terrell                 HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
1317e0c1b49fSNick Terrell                 HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
1318e0c1b49fSNick Terrell                 HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
1319e0c1b49fSNick Terrell                 HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
1320e0c1b49fSNick Terrell                 HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
1321e0c1b49fSNick Terrell                 HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
1322e0c1b49fSNick Terrell                 HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
1323e0c1b49fSNick Terrell                 HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
1324e0c1b49fSNick Terrell                 HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
1325e0c1b49fSNick Terrell                 HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
1326e0c1b49fSNick Terrell                 HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
1327e0c1b49fSNick Terrell                 HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
1328e0c1b49fSNick Terrell                 HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
1329e0c1b49fSNick Terrell                 HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
1330e0c1b49fSNick Terrell                 HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
13310a8ea235SNathan Chancellor                 endSignal = (U32)LIKELY((U32)
1332e0c1b49fSNick Terrell                             (BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished)
1333e0c1b49fSNick Terrell                         & (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished)
1334e0c1b49fSNick Terrell                         & (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished)
1335e0c1b49fSNick Terrell                         & (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished));
1336e0c1b49fSNick Terrell #endif
1337e0c1b49fSNick Terrell             }
1338*2aa14b1aSNick Terrell         }
1339e0c1b49fSNick Terrell 
1340e0c1b49fSNick Terrell         /* check corruption */
1341e0c1b49fSNick Terrell         if (op1 > opStart2) return ERROR(corruption_detected);
1342e0c1b49fSNick Terrell         if (op2 > opStart3) return ERROR(corruption_detected);
1343e0c1b49fSNick Terrell         if (op3 > opStart4) return ERROR(corruption_detected);
1344e0c1b49fSNick Terrell         /* note : op4 already verified within main loop */
1345e0c1b49fSNick Terrell 
1346e0c1b49fSNick Terrell         /* finish bitStreams one by one */
1347e0c1b49fSNick Terrell         HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
1348e0c1b49fSNick Terrell         HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
1349e0c1b49fSNick Terrell         HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
1350e0c1b49fSNick Terrell         HUF_decodeStreamX2(op4, &bitD4, oend,     dt, dtLog);
1351e0c1b49fSNick Terrell 
1352e0c1b49fSNick Terrell         /* check */
1353e0c1b49fSNick Terrell         { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
1354e0c1b49fSNick Terrell           if (!endCheck) return ERROR(corruption_detected); }
1355e0c1b49fSNick Terrell 
1356e0c1b49fSNick Terrell         /* decoded size */
1357e0c1b49fSNick Terrell         return dstSize;
1358e0c1b49fSNick Terrell     }
1359e0c1b49fSNick Terrell }
1360e0c1b49fSNick Terrell 
1361*2aa14b1aSNick Terrell #if HUF_NEED_BMI2_FUNCTION
1362*2aa14b1aSNick Terrell static BMI2_TARGET_ATTRIBUTE
1363*2aa14b1aSNick Terrell size_t HUF_decompress4X2_usingDTable_internal_bmi2(void* dst, size_t dstSize, void const* cSrc,
1364*2aa14b1aSNick Terrell                     size_t cSrcSize, HUF_DTable const* DTable) {
1365*2aa14b1aSNick Terrell     return HUF_decompress4X2_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
1366*2aa14b1aSNick Terrell }
1367*2aa14b1aSNick Terrell #endif
1368*2aa14b1aSNick Terrell 
1369*2aa14b1aSNick Terrell #if HUF_NEED_DEFAULT_FUNCTION
1370*2aa14b1aSNick Terrell static
1371*2aa14b1aSNick Terrell size_t HUF_decompress4X2_usingDTable_internal_default(void* dst, size_t dstSize, void const* cSrc,
1372*2aa14b1aSNick Terrell                     size_t cSrcSize, HUF_DTable const* DTable) {
1373*2aa14b1aSNick Terrell     return HUF_decompress4X2_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
1374*2aa14b1aSNick Terrell }
1375*2aa14b1aSNick Terrell #endif
1376*2aa14b1aSNick Terrell 
1377*2aa14b1aSNick Terrell #if ZSTD_ENABLE_ASM_X86_64_BMI2
1378*2aa14b1aSNick Terrell 
1379*2aa14b1aSNick Terrell HUF_ASM_DECL void HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop(HUF_DecompressAsmArgs* args) ZSTDLIB_HIDDEN;
1380*2aa14b1aSNick Terrell 
1381*2aa14b1aSNick Terrell static HUF_ASM_X86_64_BMI2_ATTRS size_t
1382*2aa14b1aSNick Terrell HUF_decompress4X2_usingDTable_internal_bmi2_asm(
1383*2aa14b1aSNick Terrell           void* dst,  size_t dstSize,
1384*2aa14b1aSNick Terrell     const void* cSrc, size_t cSrcSize,
1385*2aa14b1aSNick Terrell     const HUF_DTable* DTable) {
1386*2aa14b1aSNick Terrell     void const* dt = DTable + 1;
1387*2aa14b1aSNick Terrell     const BYTE* const iend = (const BYTE*)cSrc + 6;
1388*2aa14b1aSNick Terrell     BYTE* const oend = (BYTE*)dst + dstSize;
1389*2aa14b1aSNick Terrell     HUF_DecompressAsmArgs args;
1390*2aa14b1aSNick Terrell     {
1391*2aa14b1aSNick Terrell         size_t const ret = HUF_DecompressAsmArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
1392*2aa14b1aSNick Terrell         FORWARD_IF_ERROR(ret, "Failed to init asm args");
1393*2aa14b1aSNick Terrell         if (ret != 0)
1394*2aa14b1aSNick Terrell             return HUF_decompress4X2_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
1395*2aa14b1aSNick Terrell     }
1396*2aa14b1aSNick Terrell 
1397*2aa14b1aSNick Terrell     assert(args.ip[0] >= args.ilimit);
1398*2aa14b1aSNick Terrell     HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop(&args);
1399*2aa14b1aSNick Terrell 
1400*2aa14b1aSNick Terrell     /* note : op4 already verified within main loop */
1401*2aa14b1aSNick Terrell     assert(args.ip[0] >= iend);
1402*2aa14b1aSNick Terrell     assert(args.ip[1] >= iend);
1403*2aa14b1aSNick Terrell     assert(args.ip[2] >= iend);
1404*2aa14b1aSNick Terrell     assert(args.ip[3] >= iend);
1405*2aa14b1aSNick Terrell     assert(args.op[3] <= oend);
1406*2aa14b1aSNick Terrell     (void)iend;
1407*2aa14b1aSNick Terrell 
1408*2aa14b1aSNick Terrell     /* finish bitStreams one by one */
1409*2aa14b1aSNick Terrell     {
1410*2aa14b1aSNick Terrell         size_t const segmentSize = (dstSize+3) / 4;
1411*2aa14b1aSNick Terrell         BYTE* segmentEnd = (BYTE*)dst;
1412*2aa14b1aSNick Terrell         int i;
1413*2aa14b1aSNick Terrell         for (i = 0; i < 4; ++i) {
1414*2aa14b1aSNick Terrell             BIT_DStream_t bit;
1415*2aa14b1aSNick Terrell             if (segmentSize <= (size_t)(oend - segmentEnd))
1416*2aa14b1aSNick Terrell                 segmentEnd += segmentSize;
1417*2aa14b1aSNick Terrell             else
1418*2aa14b1aSNick Terrell                 segmentEnd = oend;
1419*2aa14b1aSNick Terrell             FORWARD_IF_ERROR(HUF_initRemainingDStream(&bit, &args, i, segmentEnd), "corruption");
1420*2aa14b1aSNick Terrell             args.op[i] += HUF_decodeStreamX2(args.op[i], &bit, segmentEnd, (HUF_DEltX2 const*)dt, HUF_DECODER_FAST_TABLELOG);
1421*2aa14b1aSNick Terrell             if (args.op[i] != segmentEnd)
1422*2aa14b1aSNick Terrell                 return ERROR(corruption_detected);
1423*2aa14b1aSNick Terrell         }
1424*2aa14b1aSNick Terrell     }
1425*2aa14b1aSNick Terrell 
1426*2aa14b1aSNick Terrell     /* decoded size */
1427*2aa14b1aSNick Terrell     return dstSize;
1428*2aa14b1aSNick Terrell }
1429*2aa14b1aSNick Terrell #endif /* ZSTD_ENABLE_ASM_X86_64_BMI2 */
1430*2aa14b1aSNick Terrell 
1431*2aa14b1aSNick Terrell static size_t HUF_decompress4X2_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc,
1432*2aa14b1aSNick Terrell                     size_t cSrcSize, HUF_DTable const* DTable, int bmi2)
1433*2aa14b1aSNick Terrell {
1434*2aa14b1aSNick Terrell #if DYNAMIC_BMI2
1435*2aa14b1aSNick Terrell     if (bmi2) {
1436*2aa14b1aSNick Terrell # if ZSTD_ENABLE_ASM_X86_64_BMI2
1437*2aa14b1aSNick Terrell         return HUF_decompress4X2_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);
1438*2aa14b1aSNick Terrell # else
1439*2aa14b1aSNick Terrell         return HUF_decompress4X2_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
1440*2aa14b1aSNick Terrell # endif
1441*2aa14b1aSNick Terrell     }
1442*2aa14b1aSNick Terrell #else
1443*2aa14b1aSNick Terrell     (void)bmi2;
1444*2aa14b1aSNick Terrell #endif
1445*2aa14b1aSNick Terrell 
1446*2aa14b1aSNick Terrell #if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)
1447*2aa14b1aSNick Terrell     return HUF_decompress4X2_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);
1448*2aa14b1aSNick Terrell #else
1449*2aa14b1aSNick Terrell     return HUF_decompress4X2_usingDTable_internal_default(dst, dstSize, cSrc, cSrcSize, DTable);
1450*2aa14b1aSNick Terrell #endif
1451*2aa14b1aSNick Terrell }
1452*2aa14b1aSNick Terrell 
1453e0c1b49fSNick Terrell HUF_DGEN(HUF_decompress1X2_usingDTable_internal)
1454e0c1b49fSNick Terrell 
1455e0c1b49fSNick Terrell size_t HUF_decompress1X2_usingDTable(
1456e0c1b49fSNick Terrell           void* dst,  size_t dstSize,
1457e0c1b49fSNick Terrell     const void* cSrc, size_t cSrcSize,
1458e0c1b49fSNick Terrell     const HUF_DTable* DTable)
1459e0c1b49fSNick Terrell {
1460e0c1b49fSNick Terrell     DTableDesc dtd = HUF_getDTableDesc(DTable);
1461e0c1b49fSNick Terrell     if (dtd.tableType != 1) return ERROR(GENERIC);
1462e0c1b49fSNick Terrell     return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
1463e0c1b49fSNick Terrell }
1464e0c1b49fSNick Terrell 
1465e0c1b49fSNick Terrell size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
1466e0c1b49fSNick Terrell                                    const void* cSrc, size_t cSrcSize,
1467e0c1b49fSNick Terrell                                    void* workSpace, size_t wkspSize)
1468e0c1b49fSNick Terrell {
1469e0c1b49fSNick Terrell     const BYTE* ip = (const BYTE*) cSrc;
1470e0c1b49fSNick Terrell 
1471e0c1b49fSNick Terrell     size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize,
1472e0c1b49fSNick Terrell                                                workSpace, wkspSize);
1473e0c1b49fSNick Terrell     if (HUF_isError(hSize)) return hSize;
1474e0c1b49fSNick Terrell     if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
1475e0c1b49fSNick Terrell     ip += hSize; cSrcSize -= hSize;
1476e0c1b49fSNick Terrell 
1477e0c1b49fSNick Terrell     return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
1478e0c1b49fSNick Terrell }
1479e0c1b49fSNick Terrell 
1480e0c1b49fSNick Terrell 
1481e0c1b49fSNick Terrell size_t HUF_decompress4X2_usingDTable(
1482e0c1b49fSNick Terrell           void* dst,  size_t dstSize,
1483e0c1b49fSNick Terrell     const void* cSrc, size_t cSrcSize,
1484e0c1b49fSNick Terrell     const HUF_DTable* DTable)
1485e0c1b49fSNick Terrell {
1486e0c1b49fSNick Terrell     DTableDesc dtd = HUF_getDTableDesc(DTable);
1487e0c1b49fSNick Terrell     if (dtd.tableType != 1) return ERROR(GENERIC);
1488e0c1b49fSNick Terrell     return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
1489e0c1b49fSNick Terrell }
1490e0c1b49fSNick Terrell 
1491e0c1b49fSNick Terrell static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
1492e0c1b49fSNick Terrell                                    const void* cSrc, size_t cSrcSize,
1493e0c1b49fSNick Terrell                                    void* workSpace, size_t wkspSize, int bmi2)
1494e0c1b49fSNick Terrell {
1495e0c1b49fSNick Terrell     const BYTE* ip = (const BYTE*) cSrc;
1496e0c1b49fSNick Terrell 
1497e0c1b49fSNick Terrell     size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize,
1498e0c1b49fSNick Terrell                                          workSpace, wkspSize);
1499e0c1b49fSNick Terrell     if (HUF_isError(hSize)) return hSize;
1500e0c1b49fSNick Terrell     if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
1501e0c1b49fSNick Terrell     ip += hSize; cSrcSize -= hSize;
1502e0c1b49fSNick Terrell 
1503e0c1b49fSNick Terrell     return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
1504e0c1b49fSNick Terrell }
1505e0c1b49fSNick Terrell 
1506e0c1b49fSNick Terrell size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
1507e0c1b49fSNick Terrell                                    const void* cSrc, size_t cSrcSize,
1508e0c1b49fSNick Terrell                                    void* workSpace, size_t wkspSize)
1509e0c1b49fSNick Terrell {
1510e0c1b49fSNick Terrell     return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);
1511e0c1b49fSNick Terrell }
1512e0c1b49fSNick Terrell 
1513e0c1b49fSNick Terrell 
1514e0c1b49fSNick Terrell #endif /* HUF_FORCE_DECOMPRESS_X1 */
1515e0c1b49fSNick Terrell 
1516e0c1b49fSNick Terrell 
1517e0c1b49fSNick Terrell /* ***********************************/
1518e0c1b49fSNick Terrell /* Universal decompression selectors */
1519e0c1b49fSNick Terrell /* ***********************************/
1520e0c1b49fSNick Terrell 
1521e0c1b49fSNick Terrell size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,
1522e0c1b49fSNick Terrell                                     const void* cSrc, size_t cSrcSize,
1523e0c1b49fSNick Terrell                                     const HUF_DTable* DTable)
1524e0c1b49fSNick Terrell {
1525e0c1b49fSNick Terrell     DTableDesc const dtd = HUF_getDTableDesc(DTable);
1526e0c1b49fSNick Terrell #if defined(HUF_FORCE_DECOMPRESS_X1)
1527e0c1b49fSNick Terrell     (void)dtd;
1528e0c1b49fSNick Terrell     assert(dtd.tableType == 0);
1529e0c1b49fSNick Terrell     return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
1530e0c1b49fSNick Terrell #elif defined(HUF_FORCE_DECOMPRESS_X2)
1531e0c1b49fSNick Terrell     (void)dtd;
1532e0c1b49fSNick Terrell     assert(dtd.tableType == 1);
1533e0c1b49fSNick Terrell     return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
1534e0c1b49fSNick Terrell #else
1535e0c1b49fSNick Terrell     return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
1536e0c1b49fSNick Terrell                            HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
1537e0c1b49fSNick Terrell #endif
1538e0c1b49fSNick Terrell }
1539e0c1b49fSNick Terrell 
1540e0c1b49fSNick Terrell size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
1541e0c1b49fSNick Terrell                                     const void* cSrc, size_t cSrcSize,
1542e0c1b49fSNick Terrell                                     const HUF_DTable* DTable)
1543e0c1b49fSNick Terrell {
1544e0c1b49fSNick Terrell     DTableDesc const dtd = HUF_getDTableDesc(DTable);
1545e0c1b49fSNick Terrell #if defined(HUF_FORCE_DECOMPRESS_X1)
1546e0c1b49fSNick Terrell     (void)dtd;
1547e0c1b49fSNick Terrell     assert(dtd.tableType == 0);
1548e0c1b49fSNick Terrell     return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
1549e0c1b49fSNick Terrell #elif defined(HUF_FORCE_DECOMPRESS_X2)
1550e0c1b49fSNick Terrell     (void)dtd;
1551e0c1b49fSNick Terrell     assert(dtd.tableType == 1);
1552e0c1b49fSNick Terrell     return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
1553e0c1b49fSNick Terrell #else
1554e0c1b49fSNick Terrell     return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
1555e0c1b49fSNick Terrell                            HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
1556e0c1b49fSNick Terrell #endif
1557e0c1b49fSNick Terrell }
1558e0c1b49fSNick Terrell 
1559e0c1b49fSNick Terrell 
1560e0c1b49fSNick Terrell #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
1561e0c1b49fSNick Terrell typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
1562*2aa14b1aSNick Terrell static const algo_time_t algoTime[16 /* Quantization */][2 /* single, double */] =
1563e0c1b49fSNick Terrell {
1564e0c1b49fSNick Terrell     /* single, double, quad */
1565*2aa14b1aSNick Terrell     {{0,0}, {1,1}},  /* Q==0 : impossible */
1566*2aa14b1aSNick Terrell     {{0,0}, {1,1}},  /* Q==1 : impossible */
1567*2aa14b1aSNick Terrell     {{ 150,216}, { 381,119}},   /* Q == 2 : 12-18% */
1568*2aa14b1aSNick Terrell     {{ 170,205}, { 514,112}},   /* Q == 3 : 18-25% */
1569*2aa14b1aSNick Terrell     {{ 177,199}, { 539,110}},   /* Q == 4 : 25-32% */
1570*2aa14b1aSNick Terrell     {{ 197,194}, { 644,107}},   /* Q == 5 : 32-38% */
1571*2aa14b1aSNick Terrell     {{ 221,192}, { 735,107}},   /* Q == 6 : 38-44% */
1572*2aa14b1aSNick Terrell     {{ 256,189}, { 881,106}},   /* Q == 7 : 44-50% */
1573*2aa14b1aSNick Terrell     {{ 359,188}, {1167,109}},   /* Q == 8 : 50-56% */
1574*2aa14b1aSNick Terrell     {{ 582,187}, {1570,114}},   /* Q == 9 : 56-62% */
1575*2aa14b1aSNick Terrell     {{ 688,187}, {1712,122}},   /* Q ==10 : 62-69% */
1576*2aa14b1aSNick Terrell     {{ 825,186}, {1965,136}},   /* Q ==11 : 69-75% */
1577*2aa14b1aSNick Terrell     {{ 976,185}, {2131,150}},   /* Q ==12 : 75-81% */
1578*2aa14b1aSNick Terrell     {{1180,186}, {2070,175}},   /* Q ==13 : 81-87% */
1579*2aa14b1aSNick Terrell     {{1377,185}, {1731,202}},   /* Q ==14 : 87-93% */
1580*2aa14b1aSNick Terrell     {{1412,185}, {1695,202}},   /* Q ==15 : 93-99% */
1581e0c1b49fSNick Terrell };
1582e0c1b49fSNick Terrell #endif
1583e0c1b49fSNick Terrell 
1584e0c1b49fSNick Terrell /* HUF_selectDecoder() :
1585e0c1b49fSNick Terrell  *  Tells which decoder is likely to decode faster,
1586e0c1b49fSNick Terrell  *  based on a set of pre-computed metrics.
1587e0c1b49fSNick Terrell  * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
1588e0c1b49fSNick Terrell  *  Assumption : 0 < dstSize <= 128 KB */
1589e0c1b49fSNick Terrell U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
1590e0c1b49fSNick Terrell {
1591e0c1b49fSNick Terrell     assert(dstSize > 0);
1592e0c1b49fSNick Terrell     assert(dstSize <= 128*1024);
1593e0c1b49fSNick Terrell #if defined(HUF_FORCE_DECOMPRESS_X1)
1594e0c1b49fSNick Terrell     (void)dstSize;
1595e0c1b49fSNick Terrell     (void)cSrcSize;
1596e0c1b49fSNick Terrell     return 0;
1597e0c1b49fSNick Terrell #elif defined(HUF_FORCE_DECOMPRESS_X2)
1598e0c1b49fSNick Terrell     (void)dstSize;
1599e0c1b49fSNick Terrell     (void)cSrcSize;
1600e0c1b49fSNick Terrell     return 1;
1601e0c1b49fSNick Terrell #else
1602e0c1b49fSNick Terrell     /* decoder timing evaluation */
1603e0c1b49fSNick Terrell     {   U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize);   /* Q < 16 */
1604e0c1b49fSNick Terrell         U32 const D256 = (U32)(dstSize >> 8);
1605e0c1b49fSNick Terrell         U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
1606e0c1b49fSNick Terrell         U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
1607*2aa14b1aSNick Terrell         DTime1 += DTime1 >> 5;  /* small advantage to algorithm using less memory, to reduce cache eviction */
1608e0c1b49fSNick Terrell         return DTime1 < DTime0;
1609e0c1b49fSNick Terrell     }
1610e0c1b49fSNick Terrell #endif
1611e0c1b49fSNick Terrell }
1612e0c1b49fSNick Terrell 
1613e0c1b49fSNick Terrell 
1614e0c1b49fSNick Terrell size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
1615e0c1b49fSNick Terrell                                      size_t dstSize, const void* cSrc,
1616e0c1b49fSNick Terrell                                      size_t cSrcSize, void* workSpace,
1617e0c1b49fSNick Terrell                                      size_t wkspSize)
1618e0c1b49fSNick Terrell {
1619e0c1b49fSNick Terrell     /* validation checks */
1620e0c1b49fSNick Terrell     if (dstSize == 0) return ERROR(dstSize_tooSmall);
1621e0c1b49fSNick Terrell     if (cSrcSize == 0) return ERROR(corruption_detected);
1622e0c1b49fSNick Terrell 
1623e0c1b49fSNick Terrell     {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1624e0c1b49fSNick Terrell #if defined(HUF_FORCE_DECOMPRESS_X1)
1625e0c1b49fSNick Terrell         (void)algoNb;
1626e0c1b49fSNick Terrell         assert(algoNb == 0);
1627e0c1b49fSNick Terrell         return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1628e0c1b49fSNick Terrell #elif defined(HUF_FORCE_DECOMPRESS_X2)
1629e0c1b49fSNick Terrell         (void)algoNb;
1630e0c1b49fSNick Terrell         assert(algoNb == 1);
1631e0c1b49fSNick Terrell         return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1632e0c1b49fSNick Terrell #else
1633e0c1b49fSNick Terrell         return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1634e0c1b49fSNick Terrell                             cSrcSize, workSpace, wkspSize):
1635e0c1b49fSNick Terrell                         HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1636e0c1b49fSNick Terrell #endif
1637e0c1b49fSNick Terrell     }
1638e0c1b49fSNick Terrell }
1639e0c1b49fSNick Terrell 
1640e0c1b49fSNick Terrell size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
1641e0c1b49fSNick Terrell                                   const void* cSrc, size_t cSrcSize,
1642e0c1b49fSNick Terrell                                   void* workSpace, size_t wkspSize)
1643e0c1b49fSNick Terrell {
1644e0c1b49fSNick Terrell     /* validation checks */
1645e0c1b49fSNick Terrell     if (dstSize == 0) return ERROR(dstSize_tooSmall);
1646e0c1b49fSNick Terrell     if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
1647e0c1b49fSNick Terrell     if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
1648e0c1b49fSNick Terrell     if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
1649e0c1b49fSNick Terrell 
1650e0c1b49fSNick Terrell     {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1651e0c1b49fSNick Terrell #if defined(HUF_FORCE_DECOMPRESS_X1)
1652e0c1b49fSNick Terrell         (void)algoNb;
1653e0c1b49fSNick Terrell         assert(algoNb == 0);
1654e0c1b49fSNick Terrell         return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
1655e0c1b49fSNick Terrell                                 cSrcSize, workSpace, wkspSize);
1656e0c1b49fSNick Terrell #elif defined(HUF_FORCE_DECOMPRESS_X2)
1657e0c1b49fSNick Terrell         (void)algoNb;
1658e0c1b49fSNick Terrell         assert(algoNb == 1);
1659e0c1b49fSNick Terrell         return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1660e0c1b49fSNick Terrell                                 cSrcSize, workSpace, wkspSize);
1661e0c1b49fSNick Terrell #else
1662e0c1b49fSNick Terrell         return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1663e0c1b49fSNick Terrell                                 cSrcSize, workSpace, wkspSize):
1664e0c1b49fSNick Terrell                         HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
1665e0c1b49fSNick Terrell                                 cSrcSize, workSpace, wkspSize);
1666e0c1b49fSNick Terrell #endif
1667e0c1b49fSNick Terrell     }
1668e0c1b49fSNick Terrell }
1669e0c1b49fSNick Terrell 
1670e0c1b49fSNick Terrell 
1671e0c1b49fSNick Terrell size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1672e0c1b49fSNick Terrell {
1673e0c1b49fSNick Terrell     DTableDesc const dtd = HUF_getDTableDesc(DTable);
1674e0c1b49fSNick Terrell #if defined(HUF_FORCE_DECOMPRESS_X1)
1675e0c1b49fSNick Terrell     (void)dtd;
1676e0c1b49fSNick Terrell     assert(dtd.tableType == 0);
1677e0c1b49fSNick Terrell     return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1678e0c1b49fSNick Terrell #elif defined(HUF_FORCE_DECOMPRESS_X2)
1679e0c1b49fSNick Terrell     (void)dtd;
1680e0c1b49fSNick Terrell     assert(dtd.tableType == 1);
1681e0c1b49fSNick Terrell     return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1682e0c1b49fSNick Terrell #else
1683e0c1b49fSNick Terrell     return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1684e0c1b49fSNick Terrell                            HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1685e0c1b49fSNick Terrell #endif
1686e0c1b49fSNick Terrell }
1687e0c1b49fSNick Terrell 
1688e0c1b49fSNick Terrell #ifndef HUF_FORCE_DECOMPRESS_X2
1689e0c1b49fSNick Terrell size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
1690e0c1b49fSNick Terrell {
1691e0c1b49fSNick Terrell     const BYTE* ip = (const BYTE*) cSrc;
1692e0c1b49fSNick Terrell 
1693e0c1b49fSNick Terrell     size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1694e0c1b49fSNick Terrell     if (HUF_isError(hSize)) return hSize;
1695e0c1b49fSNick Terrell     if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
1696e0c1b49fSNick Terrell     ip += hSize; cSrcSize -= hSize;
1697e0c1b49fSNick Terrell 
1698e0c1b49fSNick Terrell     return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
1699e0c1b49fSNick Terrell }
1700e0c1b49fSNick Terrell #endif
1701e0c1b49fSNick Terrell 
1702e0c1b49fSNick Terrell size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1703e0c1b49fSNick Terrell {
1704e0c1b49fSNick Terrell     DTableDesc const dtd = HUF_getDTableDesc(DTable);
1705e0c1b49fSNick Terrell #if defined(HUF_FORCE_DECOMPRESS_X1)
1706e0c1b49fSNick Terrell     (void)dtd;
1707e0c1b49fSNick Terrell     assert(dtd.tableType == 0);
1708e0c1b49fSNick Terrell     return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1709e0c1b49fSNick Terrell #elif defined(HUF_FORCE_DECOMPRESS_X2)
1710e0c1b49fSNick Terrell     (void)dtd;
1711e0c1b49fSNick Terrell     assert(dtd.tableType == 1);
1712e0c1b49fSNick Terrell     return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1713e0c1b49fSNick Terrell #else
1714e0c1b49fSNick Terrell     return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1715e0c1b49fSNick Terrell                            HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1716e0c1b49fSNick Terrell #endif
1717e0c1b49fSNick Terrell }
1718e0c1b49fSNick Terrell 
1719e0c1b49fSNick Terrell size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
1720e0c1b49fSNick Terrell {
1721e0c1b49fSNick Terrell     /* validation checks */
1722e0c1b49fSNick Terrell     if (dstSize == 0) return ERROR(dstSize_tooSmall);
1723e0c1b49fSNick Terrell     if (cSrcSize == 0) return ERROR(corruption_detected);
1724e0c1b49fSNick Terrell 
1725e0c1b49fSNick Terrell     {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1726e0c1b49fSNick Terrell #if defined(HUF_FORCE_DECOMPRESS_X1)
1727e0c1b49fSNick Terrell         (void)algoNb;
1728e0c1b49fSNick Terrell         assert(algoNb == 0);
1729e0c1b49fSNick Terrell         return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1730e0c1b49fSNick Terrell #elif defined(HUF_FORCE_DECOMPRESS_X2)
1731e0c1b49fSNick Terrell         (void)algoNb;
1732e0c1b49fSNick Terrell         assert(algoNb == 1);
1733e0c1b49fSNick Terrell         return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1734e0c1b49fSNick Terrell #else
1735e0c1b49fSNick Terrell         return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :
1736e0c1b49fSNick Terrell                         HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1737e0c1b49fSNick Terrell #endif
1738e0c1b49fSNick Terrell     }
1739e0c1b49fSNick Terrell }
1740e0c1b49fSNick Terrell 
1741