1e0c1b49fSNick Terrell /*
2e0c1b49fSNick Terrell * Copyright (c) Yann Collet, Facebook, Inc.
3e0c1b49fSNick Terrell * All rights reserved.
4e0c1b49fSNick Terrell *
5e0c1b49fSNick Terrell * This source code is licensed under both the BSD-style license (found in the
6e0c1b49fSNick Terrell * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7e0c1b49fSNick Terrell * in the COPYING file in the root directory of this source tree).
8e0c1b49fSNick Terrell * You may select, at your option, one of the above-listed licenses.
9e0c1b49fSNick Terrell */
10e0c1b49fSNick Terrell
11e0c1b49fSNick Terrell
12e0c1b49fSNick Terrell /* zstd_decompress_internal:
13e0c1b49fSNick Terrell * objects and definitions shared within lib/decompress modules */
14e0c1b49fSNick Terrell
15e0c1b49fSNick Terrell #ifndef ZSTD_DECOMPRESS_INTERNAL_H
16e0c1b49fSNick Terrell #define ZSTD_DECOMPRESS_INTERNAL_H
17e0c1b49fSNick Terrell
18e0c1b49fSNick Terrell
19e0c1b49fSNick Terrell /*-*******************************************************
20e0c1b49fSNick Terrell * Dependencies
21e0c1b49fSNick Terrell *********************************************************/
22e0c1b49fSNick Terrell #include "../common/mem.h" /* BYTE, U16, U32 */
23*2aa14b1aSNick Terrell #include "../common/zstd_internal.h" /* constants : MaxLL, MaxML, MaxOff, LLFSELog, etc. */
24e0c1b49fSNick Terrell
25e0c1b49fSNick Terrell
26e0c1b49fSNick Terrell
27e0c1b49fSNick Terrell /*-*******************************************************
28e0c1b49fSNick Terrell * Constants
29e0c1b49fSNick Terrell *********************************************************/
30e0c1b49fSNick Terrell static UNUSED_ATTR const U32 LL_base[MaxLL+1] = {
31e0c1b49fSNick Terrell 0, 1, 2, 3, 4, 5, 6, 7,
32e0c1b49fSNick Terrell 8, 9, 10, 11, 12, 13, 14, 15,
33e0c1b49fSNick Terrell 16, 18, 20, 22, 24, 28, 32, 40,
34e0c1b49fSNick Terrell 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
35e0c1b49fSNick Terrell 0x2000, 0x4000, 0x8000, 0x10000 };
36e0c1b49fSNick Terrell
37e0c1b49fSNick Terrell static UNUSED_ATTR const U32 OF_base[MaxOff+1] = {
38e0c1b49fSNick Terrell 0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D,
39e0c1b49fSNick Terrell 0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD,
40e0c1b49fSNick Terrell 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
41e0c1b49fSNick Terrell 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
42e0c1b49fSNick Terrell
43*2aa14b1aSNick Terrell static UNUSED_ATTR const U8 OF_bits[MaxOff+1] = {
44e0c1b49fSNick Terrell 0, 1, 2, 3, 4, 5, 6, 7,
45e0c1b49fSNick Terrell 8, 9, 10, 11, 12, 13, 14, 15,
46e0c1b49fSNick Terrell 16, 17, 18, 19, 20, 21, 22, 23,
47e0c1b49fSNick Terrell 24, 25, 26, 27, 28, 29, 30, 31 };
48e0c1b49fSNick Terrell
49e0c1b49fSNick Terrell static UNUSED_ATTR const U32 ML_base[MaxML+1] = {
50e0c1b49fSNick Terrell 3, 4, 5, 6, 7, 8, 9, 10,
51e0c1b49fSNick Terrell 11, 12, 13, 14, 15, 16, 17, 18,
52e0c1b49fSNick Terrell 19, 20, 21, 22, 23, 24, 25, 26,
53e0c1b49fSNick Terrell 27, 28, 29, 30, 31, 32, 33, 34,
54e0c1b49fSNick Terrell 35, 37, 39, 41, 43, 47, 51, 59,
55e0c1b49fSNick Terrell 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803,
56e0c1b49fSNick Terrell 0x1003, 0x2003, 0x4003, 0x8003, 0x10003 };
57e0c1b49fSNick Terrell
58e0c1b49fSNick Terrell
59e0c1b49fSNick Terrell /*-*******************************************************
60e0c1b49fSNick Terrell * Decompression types
61e0c1b49fSNick Terrell *********************************************************/
62e0c1b49fSNick Terrell typedef struct {
63e0c1b49fSNick Terrell U32 fastMode;
64e0c1b49fSNick Terrell U32 tableLog;
65e0c1b49fSNick Terrell } ZSTD_seqSymbol_header;
66e0c1b49fSNick Terrell
67e0c1b49fSNick Terrell typedef struct {
68e0c1b49fSNick Terrell U16 nextState;
69e0c1b49fSNick Terrell BYTE nbAdditionalBits;
70e0c1b49fSNick Terrell BYTE nbBits;
71e0c1b49fSNick Terrell U32 baseValue;
72e0c1b49fSNick Terrell } ZSTD_seqSymbol;
73e0c1b49fSNick Terrell
74e0c1b49fSNick Terrell #define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log)))
75e0c1b49fSNick Terrell
76e0c1b49fSNick Terrell #define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64))
77e0c1b49fSNick Terrell #define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32))
78e0c1b49fSNick Terrell
79e0c1b49fSNick Terrell typedef struct {
80e0c1b49fSNick Terrell ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */
81e0c1b49fSNick Terrell ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */
82e0c1b49fSNick Terrell ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
83e0c1b49fSNick Terrell HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */
84e0c1b49fSNick Terrell U32 rep[ZSTD_REP_NUM];
85e0c1b49fSNick Terrell U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32];
86e0c1b49fSNick Terrell } ZSTD_entropyDTables_t;
87e0c1b49fSNick Terrell
88e0c1b49fSNick Terrell typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
89e0c1b49fSNick Terrell ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock,
90e0c1b49fSNick Terrell ZSTDds_decompressLastBlock, ZSTDds_checkChecksum,
91e0c1b49fSNick Terrell ZSTDds_decodeSkippableHeader, ZSTDds_skipFrame } ZSTD_dStage;
92e0c1b49fSNick Terrell
93e0c1b49fSNick Terrell typedef enum { zdss_init=0, zdss_loadHeader,
94e0c1b49fSNick Terrell zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage;
95e0c1b49fSNick Terrell
96e0c1b49fSNick Terrell typedef enum {
97e0c1b49fSNick Terrell ZSTD_use_indefinitely = -1, /* Use the dictionary indefinitely */
98e0c1b49fSNick Terrell ZSTD_dont_use = 0, /* Do not use the dictionary (if one exists free it) */
99e0c1b49fSNick Terrell ZSTD_use_once = 1 /* Use the dictionary once and set to ZSTD_dont_use */
100e0c1b49fSNick Terrell } ZSTD_dictUses_e;
101e0c1b49fSNick Terrell
102e0c1b49fSNick Terrell /* Hashset for storing references to multiple ZSTD_DDict within ZSTD_DCtx */
103e0c1b49fSNick Terrell typedef struct {
104e0c1b49fSNick Terrell const ZSTD_DDict** ddictPtrTable;
105e0c1b49fSNick Terrell size_t ddictPtrTableSize;
106e0c1b49fSNick Terrell size_t ddictPtrCount;
107e0c1b49fSNick Terrell } ZSTD_DDictHashSet;
108e0c1b49fSNick Terrell
109*2aa14b1aSNick Terrell #ifndef ZSTD_DECODER_INTERNAL_BUFFER
110*2aa14b1aSNick Terrell # define ZSTD_DECODER_INTERNAL_BUFFER (1 << 16)
111*2aa14b1aSNick Terrell #endif
112*2aa14b1aSNick Terrell
113*2aa14b1aSNick Terrell #define ZSTD_LBMIN 64
114*2aa14b1aSNick Terrell #define ZSTD_LBMAX (128 << 10)
115*2aa14b1aSNick Terrell
116*2aa14b1aSNick Terrell /* extra buffer, compensates when dst is not large enough to store litBuffer */
117*2aa14b1aSNick Terrell #define ZSTD_LITBUFFEREXTRASIZE BOUNDED(ZSTD_LBMIN, ZSTD_DECODER_INTERNAL_BUFFER, ZSTD_LBMAX)
118*2aa14b1aSNick Terrell
119*2aa14b1aSNick Terrell typedef enum {
120*2aa14b1aSNick Terrell ZSTD_not_in_dst = 0, /* Stored entirely within litExtraBuffer */
121*2aa14b1aSNick Terrell ZSTD_in_dst = 1, /* Stored entirely within dst (in memory after current output write) */
122*2aa14b1aSNick Terrell ZSTD_split = 2 /* Split between litExtraBuffer and dst */
123*2aa14b1aSNick Terrell } ZSTD_litLocation_e;
124*2aa14b1aSNick Terrell
125e0c1b49fSNick Terrell struct ZSTD_DCtx_s
126e0c1b49fSNick Terrell {
127e0c1b49fSNick Terrell const ZSTD_seqSymbol* LLTptr;
128e0c1b49fSNick Terrell const ZSTD_seqSymbol* MLTptr;
129e0c1b49fSNick Terrell const ZSTD_seqSymbol* OFTptr;
130e0c1b49fSNick Terrell const HUF_DTable* HUFptr;
131e0c1b49fSNick Terrell ZSTD_entropyDTables_t entropy;
132e0c1b49fSNick Terrell U32 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; /* space needed when building huffman tables */
133e0c1b49fSNick Terrell const void* previousDstEnd; /* detect continuity */
134e0c1b49fSNick Terrell const void* prefixStart; /* start of current segment */
135e0c1b49fSNick Terrell const void* virtualStart; /* virtual start of previous segment if it was just before current one */
136e0c1b49fSNick Terrell const void* dictEnd; /* end of previous segment */
137e0c1b49fSNick Terrell size_t expected;
138e0c1b49fSNick Terrell ZSTD_frameHeader fParams;
139e0c1b49fSNick Terrell U64 processedCSize;
140e0c1b49fSNick Terrell U64 decodedSize;
141e0c1b49fSNick Terrell blockType_e bType; /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */
142e0c1b49fSNick Terrell ZSTD_dStage stage;
143e0c1b49fSNick Terrell U32 litEntropy;
144e0c1b49fSNick Terrell U32 fseEntropy;
145e0c1b49fSNick Terrell struct xxh64_state xxhState;
146e0c1b49fSNick Terrell size_t headerSize;
147e0c1b49fSNick Terrell ZSTD_format_e format;
148e0c1b49fSNick Terrell ZSTD_forceIgnoreChecksum_e forceIgnoreChecksum; /* User specified: if == 1, will ignore checksums in compressed frame. Default == 0 */
149e0c1b49fSNick Terrell U32 validateChecksum; /* if == 1, will validate checksum. Is == 1 if (fParams.checksumFlag == 1) and (forceIgnoreChecksum == 0). */
150e0c1b49fSNick Terrell const BYTE* litPtr;
151e0c1b49fSNick Terrell ZSTD_customMem customMem;
152e0c1b49fSNick Terrell size_t litSize;
153e0c1b49fSNick Terrell size_t rleSize;
154e0c1b49fSNick Terrell size_t staticSize;
155*2aa14b1aSNick Terrell #if DYNAMIC_BMI2 != 0
156e0c1b49fSNick Terrell int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
157*2aa14b1aSNick Terrell #endif
158e0c1b49fSNick Terrell
159e0c1b49fSNick Terrell /* dictionary */
160e0c1b49fSNick Terrell ZSTD_DDict* ddictLocal;
161e0c1b49fSNick Terrell const ZSTD_DDict* ddict; /* set by ZSTD_initDStream_usingDDict(), or ZSTD_DCtx_refDDict() */
162e0c1b49fSNick Terrell U32 dictID;
163e0c1b49fSNick Terrell int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */
164e0c1b49fSNick Terrell ZSTD_dictUses_e dictUses;
165e0c1b49fSNick Terrell ZSTD_DDictHashSet* ddictSet; /* Hash set for multiple ddicts */
166e0c1b49fSNick Terrell ZSTD_refMultipleDDicts_e refMultipleDDicts; /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */
167e0c1b49fSNick Terrell
168e0c1b49fSNick Terrell /* streaming */
169e0c1b49fSNick Terrell ZSTD_dStreamStage streamStage;
170e0c1b49fSNick Terrell char* inBuff;
171e0c1b49fSNick Terrell size_t inBuffSize;
172e0c1b49fSNick Terrell size_t inPos;
173e0c1b49fSNick Terrell size_t maxWindowSize;
174e0c1b49fSNick Terrell char* outBuff;
175e0c1b49fSNick Terrell size_t outBuffSize;
176e0c1b49fSNick Terrell size_t outStart;
177e0c1b49fSNick Terrell size_t outEnd;
178e0c1b49fSNick Terrell size_t lhSize;
179e0c1b49fSNick Terrell U32 hostageByte;
180e0c1b49fSNick Terrell int noForwardProgress;
181e0c1b49fSNick Terrell ZSTD_bufferMode_e outBufferMode;
182e0c1b49fSNick Terrell ZSTD_outBuffer expectedOutBuffer;
183e0c1b49fSNick Terrell
184e0c1b49fSNick Terrell /* workspace */
185*2aa14b1aSNick Terrell BYTE* litBuffer;
186*2aa14b1aSNick Terrell const BYTE* litBufferEnd;
187*2aa14b1aSNick Terrell ZSTD_litLocation_e litBufferLocation;
188*2aa14b1aSNick Terrell BYTE litExtraBuffer[ZSTD_LITBUFFEREXTRASIZE + WILDCOPY_OVERLENGTH]; /* literal buffer can be split between storage within dst and within this scratch buffer */
189e0c1b49fSNick Terrell BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
190e0c1b49fSNick Terrell
191e0c1b49fSNick Terrell size_t oversizedDuration;
192e0c1b49fSNick Terrell
193e0c1b49fSNick Terrell #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
194e0c1b49fSNick Terrell void const* dictContentBeginForFuzzing;
195e0c1b49fSNick Terrell void const* dictContentEndForFuzzing;
196e0c1b49fSNick Terrell #endif
197e0c1b49fSNick Terrell
198e0c1b49fSNick Terrell /* Tracing */
199e0c1b49fSNick Terrell }; /* typedef'd to ZSTD_DCtx within "zstd.h" */
200e0c1b49fSNick Terrell
ZSTD_DCtx_get_bmi2(const struct ZSTD_DCtx_s * dctx)201*2aa14b1aSNick Terrell MEM_STATIC int ZSTD_DCtx_get_bmi2(const struct ZSTD_DCtx_s *dctx) {
202*2aa14b1aSNick Terrell #if DYNAMIC_BMI2 != 0
203*2aa14b1aSNick Terrell return dctx->bmi2;
204*2aa14b1aSNick Terrell #else
205*2aa14b1aSNick Terrell (void)dctx;
206*2aa14b1aSNick Terrell return 0;
207*2aa14b1aSNick Terrell #endif
208*2aa14b1aSNick Terrell }
209e0c1b49fSNick Terrell
210e0c1b49fSNick Terrell /*-*******************************************************
211e0c1b49fSNick Terrell * Shared internal functions
212e0c1b49fSNick Terrell *********************************************************/
213e0c1b49fSNick Terrell
214e0c1b49fSNick Terrell /*! ZSTD_loadDEntropy() :
215e0c1b49fSNick Terrell * dict : must point at beginning of a valid zstd dictionary.
216e0c1b49fSNick Terrell * @return : size of dictionary header (size of magic number + dict ID + entropy tables) */
217e0c1b49fSNick Terrell size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
218e0c1b49fSNick Terrell const void* const dict, size_t const dictSize);
219e0c1b49fSNick Terrell
220e0c1b49fSNick Terrell /*! ZSTD_checkContinuity() :
221e0c1b49fSNick Terrell * check if next `dst` follows previous position, where decompression ended.
222e0c1b49fSNick Terrell * If yes, do nothing (continue on current segment).
223e0c1b49fSNick Terrell * If not, classify previous segment as "external dictionary", and start a new segment.
224e0c1b49fSNick Terrell * This function cannot fail. */
225e0c1b49fSNick Terrell void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize);
226e0c1b49fSNick Terrell
227e0c1b49fSNick Terrell
228e0c1b49fSNick Terrell #endif /* ZSTD_DECOMPRESS_INTERNAL_H */
229