xref: /openbmc/linux/lib/zstd/decompress/zstd_decompress_internal.h (revision 7ae9fb1b7ecbb5d85d07857943f677fd1a559b18)
1e0c1b49fSNick Terrell /*
2e0c1b49fSNick Terrell  * Copyright (c) Yann Collet, Facebook, Inc.
3e0c1b49fSNick Terrell  * All rights reserved.
4e0c1b49fSNick Terrell  *
5e0c1b49fSNick Terrell  * This source code is licensed under both the BSD-style license (found in the
6e0c1b49fSNick Terrell  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7e0c1b49fSNick Terrell  * in the COPYING file in the root directory of this source tree).
8e0c1b49fSNick Terrell  * You may select, at your option, one of the above-listed licenses.
9e0c1b49fSNick Terrell  */
10e0c1b49fSNick Terrell 
11e0c1b49fSNick Terrell 
12e0c1b49fSNick Terrell /* zstd_decompress_internal:
13e0c1b49fSNick Terrell  * objects and definitions shared within lib/decompress modules */
14e0c1b49fSNick Terrell 
15e0c1b49fSNick Terrell  #ifndef ZSTD_DECOMPRESS_INTERNAL_H
16e0c1b49fSNick Terrell  #define ZSTD_DECOMPRESS_INTERNAL_H
17e0c1b49fSNick Terrell 
18e0c1b49fSNick Terrell 
19e0c1b49fSNick Terrell /*-*******************************************************
20e0c1b49fSNick Terrell  *  Dependencies
21e0c1b49fSNick Terrell  *********************************************************/
22e0c1b49fSNick Terrell #include "../common/mem.h"             /* BYTE, U16, U32 */
23*2aa14b1aSNick Terrell #include "../common/zstd_internal.h"   /* constants : MaxLL, MaxML, MaxOff, LLFSELog, etc. */
24e0c1b49fSNick Terrell 
25e0c1b49fSNick Terrell 
26e0c1b49fSNick Terrell 
27e0c1b49fSNick Terrell /*-*******************************************************
28e0c1b49fSNick Terrell  *  Constants
29e0c1b49fSNick Terrell  *********************************************************/
30e0c1b49fSNick Terrell static UNUSED_ATTR const U32 LL_base[MaxLL+1] = {
31e0c1b49fSNick Terrell                  0,    1,    2,     3,     4,     5,     6,      7,
32e0c1b49fSNick Terrell                  8,    9,   10,    11,    12,    13,    14,     15,
33e0c1b49fSNick Terrell                 16,   18,   20,    22,    24,    28,    32,     40,
34e0c1b49fSNick Terrell                 48,   64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
35e0c1b49fSNick Terrell                 0x2000, 0x4000, 0x8000, 0x10000 };
36e0c1b49fSNick Terrell 
37e0c1b49fSNick Terrell static UNUSED_ATTR const U32 OF_base[MaxOff+1] = {
38e0c1b49fSNick Terrell                  0,        1,       1,       5,     0xD,     0x1D,     0x3D,     0x7D,
39e0c1b49fSNick Terrell                  0xFD,   0x1FD,   0x3FD,   0x7FD,   0xFFD,   0x1FFD,   0x3FFD,   0x7FFD,
40e0c1b49fSNick Terrell                  0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
41e0c1b49fSNick Terrell                  0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
42e0c1b49fSNick Terrell 
43*2aa14b1aSNick Terrell static UNUSED_ATTR const U8 OF_bits[MaxOff+1] = {
44e0c1b49fSNick Terrell                      0,  1,  2,  3,  4,  5,  6,  7,
45e0c1b49fSNick Terrell                      8,  9, 10, 11, 12, 13, 14, 15,
46e0c1b49fSNick Terrell                     16, 17, 18, 19, 20, 21, 22, 23,
47e0c1b49fSNick Terrell                     24, 25, 26, 27, 28, 29, 30, 31 };
48e0c1b49fSNick Terrell 
49e0c1b49fSNick Terrell static UNUSED_ATTR const U32 ML_base[MaxML+1] = {
50e0c1b49fSNick Terrell                      3,  4,  5,    6,     7,     8,     9,    10,
51e0c1b49fSNick Terrell                     11, 12, 13,   14,    15,    16,    17,    18,
52e0c1b49fSNick Terrell                     19, 20, 21,   22,    23,    24,    25,    26,
53e0c1b49fSNick Terrell                     27, 28, 29,   30,    31,    32,    33,    34,
54e0c1b49fSNick Terrell                     35, 37, 39,   41,    43,    47,    51,    59,
55e0c1b49fSNick Terrell                     67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803,
56e0c1b49fSNick Terrell                     0x1003, 0x2003, 0x4003, 0x8003, 0x10003 };
57e0c1b49fSNick Terrell 
58e0c1b49fSNick Terrell 
59e0c1b49fSNick Terrell /*-*******************************************************
60e0c1b49fSNick Terrell  *  Decompression types
61e0c1b49fSNick Terrell  *********************************************************/
62e0c1b49fSNick Terrell  typedef struct {
63e0c1b49fSNick Terrell      U32 fastMode;
64e0c1b49fSNick Terrell      U32 tableLog;
65e0c1b49fSNick Terrell  } ZSTD_seqSymbol_header;
66e0c1b49fSNick Terrell 
67e0c1b49fSNick Terrell  typedef struct {
68e0c1b49fSNick Terrell      U16  nextState;
69e0c1b49fSNick Terrell      BYTE nbAdditionalBits;
70e0c1b49fSNick Terrell      BYTE nbBits;
71e0c1b49fSNick Terrell      U32  baseValue;
72e0c1b49fSNick Terrell  } ZSTD_seqSymbol;
73e0c1b49fSNick Terrell 
74e0c1b49fSNick Terrell  #define SEQSYMBOL_TABLE_SIZE(log)   (1 + (1 << (log)))
75e0c1b49fSNick Terrell 
76e0c1b49fSNick Terrell #define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64))
77e0c1b49fSNick Terrell #define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32))
78e0c1b49fSNick Terrell 
79e0c1b49fSNick Terrell typedef struct {
80e0c1b49fSNick Terrell     ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)];    /* Note : Space reserved for FSE Tables */
81e0c1b49fSNick Terrell     ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)];   /* is also used as temporary workspace while building hufTable during DDict creation */
82e0c1b49fSNick Terrell     ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)];    /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
83e0c1b49fSNick Terrell     HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)];  /* can accommodate HUF_decompress4X */
84e0c1b49fSNick Terrell     U32 rep[ZSTD_REP_NUM];
85e0c1b49fSNick Terrell     U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32];
86e0c1b49fSNick Terrell } ZSTD_entropyDTables_t;
87e0c1b49fSNick Terrell 
88e0c1b49fSNick Terrell typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
89e0c1b49fSNick Terrell                ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock,
90e0c1b49fSNick Terrell                ZSTDds_decompressLastBlock, ZSTDds_checkChecksum,
91e0c1b49fSNick Terrell                ZSTDds_decodeSkippableHeader, ZSTDds_skipFrame } ZSTD_dStage;
92e0c1b49fSNick Terrell 
93e0c1b49fSNick Terrell typedef enum { zdss_init=0, zdss_loadHeader,
94e0c1b49fSNick Terrell                zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage;
95e0c1b49fSNick Terrell 
96e0c1b49fSNick Terrell typedef enum {
97e0c1b49fSNick Terrell     ZSTD_use_indefinitely = -1,  /* Use the dictionary indefinitely */
98e0c1b49fSNick Terrell     ZSTD_dont_use = 0,           /* Do not use the dictionary (if one exists free it) */
99e0c1b49fSNick Terrell     ZSTD_use_once = 1            /* Use the dictionary once and set to ZSTD_dont_use */
100e0c1b49fSNick Terrell } ZSTD_dictUses_e;
101e0c1b49fSNick Terrell 
102e0c1b49fSNick Terrell /* Hashset for storing references to multiple ZSTD_DDict within ZSTD_DCtx */
103e0c1b49fSNick Terrell typedef struct {
104e0c1b49fSNick Terrell     const ZSTD_DDict** ddictPtrTable;
105e0c1b49fSNick Terrell     size_t ddictPtrTableSize;
106e0c1b49fSNick Terrell     size_t ddictPtrCount;
107e0c1b49fSNick Terrell } ZSTD_DDictHashSet;
108e0c1b49fSNick Terrell 
109*2aa14b1aSNick Terrell #ifndef ZSTD_DECODER_INTERNAL_BUFFER
110*2aa14b1aSNick Terrell #  define ZSTD_DECODER_INTERNAL_BUFFER  (1 << 16)
111*2aa14b1aSNick Terrell #endif
112*2aa14b1aSNick Terrell 
113*2aa14b1aSNick Terrell #define ZSTD_LBMIN 64
114*2aa14b1aSNick Terrell #define ZSTD_LBMAX (128 << 10)
115*2aa14b1aSNick Terrell 
116*2aa14b1aSNick Terrell /* extra buffer, compensates when dst is not large enough to store litBuffer */
117*2aa14b1aSNick Terrell #define ZSTD_LITBUFFEREXTRASIZE  BOUNDED(ZSTD_LBMIN, ZSTD_DECODER_INTERNAL_BUFFER, ZSTD_LBMAX)
118*2aa14b1aSNick Terrell 
119*2aa14b1aSNick Terrell typedef enum {
120*2aa14b1aSNick Terrell     ZSTD_not_in_dst = 0,  /* Stored entirely within litExtraBuffer */
121*2aa14b1aSNick Terrell     ZSTD_in_dst = 1,           /* Stored entirely within dst (in memory after current output write) */
122*2aa14b1aSNick Terrell     ZSTD_split = 2            /* Split between litExtraBuffer and dst */
123*2aa14b1aSNick Terrell } ZSTD_litLocation_e;
124*2aa14b1aSNick Terrell 
125e0c1b49fSNick Terrell struct ZSTD_DCtx_s
126e0c1b49fSNick Terrell {
127e0c1b49fSNick Terrell     const ZSTD_seqSymbol* LLTptr;
128e0c1b49fSNick Terrell     const ZSTD_seqSymbol* MLTptr;
129e0c1b49fSNick Terrell     const ZSTD_seqSymbol* OFTptr;
130e0c1b49fSNick Terrell     const HUF_DTable* HUFptr;
131e0c1b49fSNick Terrell     ZSTD_entropyDTables_t entropy;
132e0c1b49fSNick Terrell     U32 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];   /* space needed when building huffman tables */
133e0c1b49fSNick Terrell     const void* previousDstEnd;   /* detect continuity */
134e0c1b49fSNick Terrell     const void* prefixStart;      /* start of current segment */
135e0c1b49fSNick Terrell     const void* virtualStart;     /* virtual start of previous segment if it was just before current one */
136e0c1b49fSNick Terrell     const void* dictEnd;          /* end of previous segment */
137e0c1b49fSNick Terrell     size_t expected;
138e0c1b49fSNick Terrell     ZSTD_frameHeader fParams;
139e0c1b49fSNick Terrell     U64 processedCSize;
140e0c1b49fSNick Terrell     U64 decodedSize;
141e0c1b49fSNick Terrell     blockType_e bType;            /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */
142e0c1b49fSNick Terrell     ZSTD_dStage stage;
143e0c1b49fSNick Terrell     U32 litEntropy;
144e0c1b49fSNick Terrell     U32 fseEntropy;
145e0c1b49fSNick Terrell     struct xxh64_state xxhState;
146e0c1b49fSNick Terrell     size_t headerSize;
147e0c1b49fSNick Terrell     ZSTD_format_e format;
148e0c1b49fSNick Terrell     ZSTD_forceIgnoreChecksum_e forceIgnoreChecksum;   /* User specified: if == 1, will ignore checksums in compressed frame. Default == 0 */
149e0c1b49fSNick Terrell     U32 validateChecksum;         /* if == 1, will validate checksum. Is == 1 if (fParams.checksumFlag == 1) and (forceIgnoreChecksum == 0). */
150e0c1b49fSNick Terrell     const BYTE* litPtr;
151e0c1b49fSNick Terrell     ZSTD_customMem customMem;
152e0c1b49fSNick Terrell     size_t litSize;
153e0c1b49fSNick Terrell     size_t rleSize;
154e0c1b49fSNick Terrell     size_t staticSize;
155*2aa14b1aSNick Terrell #if DYNAMIC_BMI2 != 0
156e0c1b49fSNick Terrell     int bmi2;                     /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
157*2aa14b1aSNick Terrell #endif
158e0c1b49fSNick Terrell 
159e0c1b49fSNick Terrell     /* dictionary */
160e0c1b49fSNick Terrell     ZSTD_DDict* ddictLocal;
161e0c1b49fSNick Terrell     const ZSTD_DDict* ddict;     /* set by ZSTD_initDStream_usingDDict(), or ZSTD_DCtx_refDDict() */
162e0c1b49fSNick Terrell     U32 dictID;
163e0c1b49fSNick Terrell     int ddictIsCold;             /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */
164e0c1b49fSNick Terrell     ZSTD_dictUses_e dictUses;
165e0c1b49fSNick Terrell     ZSTD_DDictHashSet* ddictSet;                    /* Hash set for multiple ddicts */
166e0c1b49fSNick Terrell     ZSTD_refMultipleDDicts_e refMultipleDDicts;     /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */
167e0c1b49fSNick Terrell 
168e0c1b49fSNick Terrell     /* streaming */
169e0c1b49fSNick Terrell     ZSTD_dStreamStage streamStage;
170e0c1b49fSNick Terrell     char*  inBuff;
171e0c1b49fSNick Terrell     size_t inBuffSize;
172e0c1b49fSNick Terrell     size_t inPos;
173e0c1b49fSNick Terrell     size_t maxWindowSize;
174e0c1b49fSNick Terrell     char*  outBuff;
175e0c1b49fSNick Terrell     size_t outBuffSize;
176e0c1b49fSNick Terrell     size_t outStart;
177e0c1b49fSNick Terrell     size_t outEnd;
178e0c1b49fSNick Terrell     size_t lhSize;
179e0c1b49fSNick Terrell     U32 hostageByte;
180e0c1b49fSNick Terrell     int noForwardProgress;
181e0c1b49fSNick Terrell     ZSTD_bufferMode_e outBufferMode;
182e0c1b49fSNick Terrell     ZSTD_outBuffer expectedOutBuffer;
183e0c1b49fSNick Terrell 
184e0c1b49fSNick Terrell     /* workspace */
185*2aa14b1aSNick Terrell     BYTE* litBuffer;
186*2aa14b1aSNick Terrell     const BYTE* litBufferEnd;
187*2aa14b1aSNick Terrell     ZSTD_litLocation_e litBufferLocation;
188*2aa14b1aSNick Terrell     BYTE litExtraBuffer[ZSTD_LITBUFFEREXTRASIZE + WILDCOPY_OVERLENGTH]; /* literal buffer can be split between storage within dst and within this scratch buffer */
189e0c1b49fSNick Terrell     BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
190e0c1b49fSNick Terrell 
191e0c1b49fSNick Terrell     size_t oversizedDuration;
192e0c1b49fSNick Terrell 
193e0c1b49fSNick Terrell #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
194e0c1b49fSNick Terrell     void const* dictContentBeginForFuzzing;
195e0c1b49fSNick Terrell     void const* dictContentEndForFuzzing;
196e0c1b49fSNick Terrell #endif
197e0c1b49fSNick Terrell 
198e0c1b49fSNick Terrell     /* Tracing */
199e0c1b49fSNick Terrell };  /* typedef'd to ZSTD_DCtx within "zstd.h" */
200e0c1b49fSNick Terrell 
ZSTD_DCtx_get_bmi2(const struct ZSTD_DCtx_s * dctx)201*2aa14b1aSNick Terrell MEM_STATIC int ZSTD_DCtx_get_bmi2(const struct ZSTD_DCtx_s *dctx) {
202*2aa14b1aSNick Terrell #if DYNAMIC_BMI2 != 0
203*2aa14b1aSNick Terrell 	return dctx->bmi2;
204*2aa14b1aSNick Terrell #else
205*2aa14b1aSNick Terrell     (void)dctx;
206*2aa14b1aSNick Terrell 	return 0;
207*2aa14b1aSNick Terrell #endif
208*2aa14b1aSNick Terrell }
209e0c1b49fSNick Terrell 
210e0c1b49fSNick Terrell /*-*******************************************************
211e0c1b49fSNick Terrell  *  Shared internal functions
212e0c1b49fSNick Terrell  *********************************************************/
213e0c1b49fSNick Terrell 
214e0c1b49fSNick Terrell /*! ZSTD_loadDEntropy() :
215e0c1b49fSNick Terrell  *  dict : must point at beginning of a valid zstd dictionary.
216e0c1b49fSNick Terrell  * @return : size of dictionary header (size of magic number + dict ID + entropy tables) */
217e0c1b49fSNick Terrell size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
218e0c1b49fSNick Terrell                    const void* const dict, size_t const dictSize);
219e0c1b49fSNick Terrell 
220e0c1b49fSNick Terrell /*! ZSTD_checkContinuity() :
221e0c1b49fSNick Terrell  *  check if next `dst` follows previous position, where decompression ended.
222e0c1b49fSNick Terrell  *  If yes, do nothing (continue on current segment).
223e0c1b49fSNick Terrell  *  If not, classify previous segment as "external dictionary", and start a new segment.
224e0c1b49fSNick Terrell  *  This function cannot fail. */
225e0c1b49fSNick Terrell void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize);
226e0c1b49fSNick Terrell 
227e0c1b49fSNick Terrell 
228e0c1b49fSNick Terrell #endif /* ZSTD_DECOMPRESS_INTERNAL_H */
229