1e0c1b49fSNick Terrell /*
2e0c1b49fSNick Terrell * Copyright (c) Yann Collet, Facebook, Inc.
3e0c1b49fSNick Terrell * All rights reserved.
4e0c1b49fSNick Terrell *
5e0c1b49fSNick Terrell * This source code is licensed under both the BSD-style license (found in the
6e0c1b49fSNick Terrell * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7e0c1b49fSNick Terrell * in the COPYING file in the root directory of this source tree).
8e0c1b49fSNick Terrell * You may select, at your option, one of the above-listed licenses.
9e0c1b49fSNick Terrell */
10e0c1b49fSNick Terrell
11e0c1b49fSNick Terrell /* This header contains definitions
12e0c1b49fSNick Terrell * that shall **only** be used by modules within lib/compress.
13e0c1b49fSNick Terrell */
14e0c1b49fSNick Terrell
15e0c1b49fSNick Terrell #ifndef ZSTD_COMPRESS_H
16e0c1b49fSNick Terrell #define ZSTD_COMPRESS_H
17e0c1b49fSNick Terrell
18e0c1b49fSNick Terrell /*-*************************************
19e0c1b49fSNick Terrell * Dependencies
20e0c1b49fSNick Terrell ***************************************/
21e0c1b49fSNick Terrell #include "../common/zstd_internal.h"
22e0c1b49fSNick Terrell #include "zstd_cwksp.h"
23e0c1b49fSNick Terrell
24e0c1b49fSNick Terrell
25e0c1b49fSNick Terrell /*-*************************************
26e0c1b49fSNick Terrell * Constants
27e0c1b49fSNick Terrell ***************************************/
28e0c1b49fSNick Terrell #define kSearchStrength 8
29e0c1b49fSNick Terrell #define HASH_READ_SIZE 8
30e0c1b49fSNick Terrell #define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted".
31e0c1b49fSNick Terrell It could be confused for a real successor at index "1", if sorted as larger than its predecessor.
32e0c1b49fSNick Terrell It's not a big deal though : candidate will just be sorted again.
33e0c1b49fSNick Terrell Additionally, candidate position 1 will be lost.
34e0c1b49fSNick Terrell But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
35e0c1b49fSNick Terrell The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy.
36e0c1b49fSNick Terrell This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
37e0c1b49fSNick Terrell
38e0c1b49fSNick Terrell
39e0c1b49fSNick Terrell /*-*************************************
40e0c1b49fSNick Terrell * Context memory management
41e0c1b49fSNick Terrell ***************************************/
42e0c1b49fSNick Terrell typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e;
43e0c1b49fSNick Terrell typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage;
44e0c1b49fSNick Terrell
45e0c1b49fSNick Terrell typedef struct ZSTD_prefixDict_s {
46e0c1b49fSNick Terrell const void* dict;
47e0c1b49fSNick Terrell size_t dictSize;
48e0c1b49fSNick Terrell ZSTD_dictContentType_e dictContentType;
49e0c1b49fSNick Terrell } ZSTD_prefixDict;
50e0c1b49fSNick Terrell
51e0c1b49fSNick Terrell typedef struct {
52e0c1b49fSNick Terrell void* dictBuffer;
53e0c1b49fSNick Terrell void const* dict;
54e0c1b49fSNick Terrell size_t dictSize;
55e0c1b49fSNick Terrell ZSTD_dictContentType_e dictContentType;
56e0c1b49fSNick Terrell ZSTD_CDict* cdict;
57e0c1b49fSNick Terrell } ZSTD_localDict;
58e0c1b49fSNick Terrell
59e0c1b49fSNick Terrell typedef struct {
60*2aa14b1aSNick Terrell HUF_CElt CTable[HUF_CTABLE_SIZE_ST(255)];
61e0c1b49fSNick Terrell HUF_repeat repeatMode;
62e0c1b49fSNick Terrell } ZSTD_hufCTables_t;
63e0c1b49fSNick Terrell
64e0c1b49fSNick Terrell typedef struct {
65e0c1b49fSNick Terrell FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
66e0c1b49fSNick Terrell FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
67e0c1b49fSNick Terrell FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
68e0c1b49fSNick Terrell FSE_repeat offcode_repeatMode;
69e0c1b49fSNick Terrell FSE_repeat matchlength_repeatMode;
70e0c1b49fSNick Terrell FSE_repeat litlength_repeatMode;
71e0c1b49fSNick Terrell } ZSTD_fseCTables_t;
72e0c1b49fSNick Terrell
73e0c1b49fSNick Terrell typedef struct {
74e0c1b49fSNick Terrell ZSTD_hufCTables_t huf;
75e0c1b49fSNick Terrell ZSTD_fseCTables_t fse;
76e0c1b49fSNick Terrell } ZSTD_entropyCTables_t;
77e0c1b49fSNick Terrell
78*2aa14b1aSNick Terrell /* *********************************************
79*2aa14b1aSNick Terrell * Entropy buffer statistics structs and funcs *
80*2aa14b1aSNick Terrell ***********************************************/
81*2aa14b1aSNick Terrell /* ZSTD_hufCTablesMetadata_t :
82*2aa14b1aSNick Terrell * Stores Literals Block Type for a super-block in hType, and
83*2aa14b1aSNick Terrell * huffman tree description in hufDesBuffer.
84*2aa14b1aSNick Terrell * hufDesSize refers to the size of huffman tree description in bytes.
85*2aa14b1aSNick Terrell * This metadata is populated in ZSTD_buildBlockEntropyStats_literals() */
86e0c1b49fSNick Terrell typedef struct {
87*2aa14b1aSNick Terrell symbolEncodingType_e hType;
88*2aa14b1aSNick Terrell BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE];
89*2aa14b1aSNick Terrell size_t hufDesSize;
90*2aa14b1aSNick Terrell } ZSTD_hufCTablesMetadata_t;
91*2aa14b1aSNick Terrell
92*2aa14b1aSNick Terrell /* ZSTD_fseCTablesMetadata_t :
93*2aa14b1aSNick Terrell * Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and
94*2aa14b1aSNick Terrell * fse tables in fseTablesBuffer.
95*2aa14b1aSNick Terrell * fseTablesSize refers to the size of fse tables in bytes.
96*2aa14b1aSNick Terrell * This metadata is populated in ZSTD_buildBlockEntropyStats_sequences() */
97*2aa14b1aSNick Terrell typedef struct {
98*2aa14b1aSNick Terrell symbolEncodingType_e llType;
99*2aa14b1aSNick Terrell symbolEncodingType_e ofType;
100*2aa14b1aSNick Terrell symbolEncodingType_e mlType;
101*2aa14b1aSNick Terrell BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE];
102*2aa14b1aSNick Terrell size_t fseTablesSize;
103*2aa14b1aSNick Terrell size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */
104*2aa14b1aSNick Terrell } ZSTD_fseCTablesMetadata_t;
105*2aa14b1aSNick Terrell
106*2aa14b1aSNick Terrell typedef struct {
107*2aa14b1aSNick Terrell ZSTD_hufCTablesMetadata_t hufMetadata;
108*2aa14b1aSNick Terrell ZSTD_fseCTablesMetadata_t fseMetadata;
109*2aa14b1aSNick Terrell } ZSTD_entropyCTablesMetadata_t;
110*2aa14b1aSNick Terrell
111*2aa14b1aSNick Terrell /* ZSTD_buildBlockEntropyStats() :
112*2aa14b1aSNick Terrell * Builds entropy for the block.
113*2aa14b1aSNick Terrell * @return : 0 on success or error code */
114*2aa14b1aSNick Terrell size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,
115*2aa14b1aSNick Terrell const ZSTD_entropyCTables_t* prevEntropy,
116*2aa14b1aSNick Terrell ZSTD_entropyCTables_t* nextEntropy,
117*2aa14b1aSNick Terrell const ZSTD_CCtx_params* cctxParams,
118*2aa14b1aSNick Terrell ZSTD_entropyCTablesMetadata_t* entropyMetadata,
119*2aa14b1aSNick Terrell void* workspace, size_t wkspSize);
120*2aa14b1aSNick Terrell
121*2aa14b1aSNick Terrell /* *******************************
122*2aa14b1aSNick Terrell * Compression internals structs *
123*2aa14b1aSNick Terrell *********************************/
124*2aa14b1aSNick Terrell
125*2aa14b1aSNick Terrell typedef struct {
126*2aa14b1aSNick Terrell U32 off; /* Offset sumtype code for the match, using ZSTD_storeSeq() format */
127e0c1b49fSNick Terrell U32 len; /* Raw length of match */
128e0c1b49fSNick Terrell } ZSTD_match_t;
129e0c1b49fSNick Terrell
130e0c1b49fSNick Terrell typedef struct {
131e0c1b49fSNick Terrell U32 offset; /* Offset of sequence */
132e0c1b49fSNick Terrell U32 litLength; /* Length of literals prior to match */
133e0c1b49fSNick Terrell U32 matchLength; /* Raw length of match */
134e0c1b49fSNick Terrell } rawSeq;
135e0c1b49fSNick Terrell
136e0c1b49fSNick Terrell typedef struct {
137e0c1b49fSNick Terrell rawSeq* seq; /* The start of the sequences */
138e0c1b49fSNick Terrell size_t pos; /* The index in seq where reading stopped. pos <= size. */
139e0c1b49fSNick Terrell size_t posInSequence; /* The position within the sequence at seq[pos] where reading
140e0c1b49fSNick Terrell stopped. posInSequence <= seq[pos].litLength + seq[pos].matchLength */
141e0c1b49fSNick Terrell size_t size; /* The number of sequences. <= capacity. */
142e0c1b49fSNick Terrell size_t capacity; /* The capacity starting from `seq` pointer */
143e0c1b49fSNick Terrell } rawSeqStore_t;
144e0c1b49fSNick Terrell
145e0c1b49fSNick Terrell UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0};
146e0c1b49fSNick Terrell
147e0c1b49fSNick Terrell typedef struct {
148e0c1b49fSNick Terrell int price;
149e0c1b49fSNick Terrell U32 off;
150e0c1b49fSNick Terrell U32 mlen;
151e0c1b49fSNick Terrell U32 litlen;
152e0c1b49fSNick Terrell U32 rep[ZSTD_REP_NUM];
153e0c1b49fSNick Terrell } ZSTD_optimal_t;
154e0c1b49fSNick Terrell
155e0c1b49fSNick Terrell typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e;
156e0c1b49fSNick Terrell
157e0c1b49fSNick Terrell typedef struct {
158e0c1b49fSNick Terrell /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */
159e0c1b49fSNick Terrell unsigned* litFreq; /* table of literals statistics, of size 256 */
160e0c1b49fSNick Terrell unsigned* litLengthFreq; /* table of litLength statistics, of size (MaxLL+1) */
161e0c1b49fSNick Terrell unsigned* matchLengthFreq; /* table of matchLength statistics, of size (MaxML+1) */
162e0c1b49fSNick Terrell unsigned* offCodeFreq; /* table of offCode statistics, of size (MaxOff+1) */
163e0c1b49fSNick Terrell ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_NUM+1 */
164e0c1b49fSNick Terrell ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */
165e0c1b49fSNick Terrell
166e0c1b49fSNick Terrell U32 litSum; /* nb of literals */
167e0c1b49fSNick Terrell U32 litLengthSum; /* nb of litLength codes */
168e0c1b49fSNick Terrell U32 matchLengthSum; /* nb of matchLength codes */
169e0c1b49fSNick Terrell U32 offCodeSum; /* nb of offset codes */
170e0c1b49fSNick Terrell U32 litSumBasePrice; /* to compare to log2(litfreq) */
171e0c1b49fSNick Terrell U32 litLengthSumBasePrice; /* to compare to log2(llfreq) */
172e0c1b49fSNick Terrell U32 matchLengthSumBasePrice;/* to compare to log2(mlfreq) */
173e0c1b49fSNick Terrell U32 offCodeSumBasePrice; /* to compare to log2(offreq) */
174e0c1b49fSNick Terrell ZSTD_OptPrice_e priceType; /* prices can be determined dynamically, or follow a pre-defined cost structure */
175e0c1b49fSNick Terrell const ZSTD_entropyCTables_t* symbolCosts; /* pre-calculated dictionary statistics */
176*2aa14b1aSNick Terrell ZSTD_paramSwitch_e literalCompressionMode;
177e0c1b49fSNick Terrell } optState_t;
178e0c1b49fSNick Terrell
179e0c1b49fSNick Terrell typedef struct {
180e0c1b49fSNick Terrell ZSTD_entropyCTables_t entropy;
181e0c1b49fSNick Terrell U32 rep[ZSTD_REP_NUM];
182e0c1b49fSNick Terrell } ZSTD_compressedBlockState_t;
183e0c1b49fSNick Terrell
184e0c1b49fSNick Terrell typedef struct {
185e0c1b49fSNick Terrell BYTE const* nextSrc; /* next block here to continue on current prefix */
186e0c1b49fSNick Terrell BYTE const* base; /* All regular indexes relative to this position */
187e0c1b49fSNick Terrell BYTE const* dictBase; /* extDict indexes relative to this position */
188e0c1b49fSNick Terrell U32 dictLimit; /* below that point, need extDict */
189e0c1b49fSNick Terrell U32 lowLimit; /* below that point, no more valid data */
190*2aa14b1aSNick Terrell U32 nbOverflowCorrections; /* Number of times overflow correction has run since
191*2aa14b1aSNick Terrell * ZSTD_window_init(). Useful for debugging coredumps
192*2aa14b1aSNick Terrell * and for ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY.
193*2aa14b1aSNick Terrell */
194e0c1b49fSNick Terrell } ZSTD_window_t;
195e0c1b49fSNick Terrell
196*2aa14b1aSNick Terrell #define ZSTD_WINDOW_START_INDEX 2
197*2aa14b1aSNick Terrell
198e0c1b49fSNick Terrell typedef struct ZSTD_matchState_t ZSTD_matchState_t;
199*2aa14b1aSNick Terrell
200*2aa14b1aSNick Terrell #define ZSTD_ROW_HASH_CACHE_SIZE 8 /* Size of prefetching hash cache for row-based matchfinder */
201*2aa14b1aSNick Terrell
202e0c1b49fSNick Terrell struct ZSTD_matchState_t {
203e0c1b49fSNick Terrell ZSTD_window_t window; /* State for window round buffer management */
204e0c1b49fSNick Terrell U32 loadedDictEnd; /* index of end of dictionary, within context's referential.
205e0c1b49fSNick Terrell * When loadedDictEnd != 0, a dictionary is in use, and still valid.
206e0c1b49fSNick Terrell * This relies on a mechanism to set loadedDictEnd=0 when dictionary is no longer within distance.
207e0c1b49fSNick Terrell * Such mechanism is provided within ZSTD_window_enforceMaxDist() and ZSTD_checkDictValidity().
208e0c1b49fSNick Terrell * When dict referential is copied into active context (i.e. not attached),
209e0c1b49fSNick Terrell * loadedDictEnd == dictSize, since referential starts from zero.
210e0c1b49fSNick Terrell */
211e0c1b49fSNick Terrell U32 nextToUpdate; /* index from which to continue table update */
212e0c1b49fSNick Terrell U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */
213*2aa14b1aSNick Terrell
214*2aa14b1aSNick Terrell U32 rowHashLog; /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/
215*2aa14b1aSNick Terrell U16* tagTable; /* For row-based matchFinder: A row-based table containing the hashes and head index. */
216*2aa14b1aSNick Terrell U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */
217*2aa14b1aSNick Terrell
218e0c1b49fSNick Terrell U32* hashTable;
219e0c1b49fSNick Terrell U32* hashTable3;
220e0c1b49fSNick Terrell U32* chainTable;
221*2aa14b1aSNick Terrell
222*2aa14b1aSNick Terrell U32 forceNonContiguous; /* Non-zero if we should force non-contiguous load for the next window update. */
223*2aa14b1aSNick Terrell
224e0c1b49fSNick Terrell int dedicatedDictSearch; /* Indicates whether this matchState is using the
225e0c1b49fSNick Terrell * dedicated dictionary search structure.
226e0c1b49fSNick Terrell */
227e0c1b49fSNick Terrell optState_t opt; /* optimal parser state */
228e0c1b49fSNick Terrell const ZSTD_matchState_t* dictMatchState;
229e0c1b49fSNick Terrell ZSTD_compressionParameters cParams;
230e0c1b49fSNick Terrell const rawSeqStore_t* ldmSeqStore;
231e0c1b49fSNick Terrell };
232e0c1b49fSNick Terrell
233e0c1b49fSNick Terrell typedef struct {
234e0c1b49fSNick Terrell ZSTD_compressedBlockState_t* prevCBlock;
235e0c1b49fSNick Terrell ZSTD_compressedBlockState_t* nextCBlock;
236e0c1b49fSNick Terrell ZSTD_matchState_t matchState;
237e0c1b49fSNick Terrell } ZSTD_blockState_t;
238e0c1b49fSNick Terrell
239e0c1b49fSNick Terrell typedef struct {
240e0c1b49fSNick Terrell U32 offset;
241e0c1b49fSNick Terrell U32 checksum;
242e0c1b49fSNick Terrell } ldmEntry_t;
243e0c1b49fSNick Terrell
244e0c1b49fSNick Terrell typedef struct {
245e0c1b49fSNick Terrell BYTE const* split;
246e0c1b49fSNick Terrell U32 hash;
247e0c1b49fSNick Terrell U32 checksum;
248e0c1b49fSNick Terrell ldmEntry_t* bucket;
249e0c1b49fSNick Terrell } ldmMatchCandidate_t;
250e0c1b49fSNick Terrell
251e0c1b49fSNick Terrell #define LDM_BATCH_SIZE 64
252e0c1b49fSNick Terrell
253e0c1b49fSNick Terrell typedef struct {
254e0c1b49fSNick Terrell ZSTD_window_t window; /* State for the window round buffer management */
255e0c1b49fSNick Terrell ldmEntry_t* hashTable;
256e0c1b49fSNick Terrell U32 loadedDictEnd;
257e0c1b49fSNick Terrell BYTE* bucketOffsets; /* Next position in bucket to insert entry */
258e0c1b49fSNick Terrell size_t splitIndices[LDM_BATCH_SIZE];
259e0c1b49fSNick Terrell ldmMatchCandidate_t matchCandidates[LDM_BATCH_SIZE];
260e0c1b49fSNick Terrell } ldmState_t;
261e0c1b49fSNick Terrell
262e0c1b49fSNick Terrell typedef struct {
263*2aa14b1aSNick Terrell ZSTD_paramSwitch_e enableLdm; /* ZSTD_ps_enable to enable LDM. ZSTD_ps_auto by default */
264e0c1b49fSNick Terrell U32 hashLog; /* Log size of hashTable */
265e0c1b49fSNick Terrell U32 bucketSizeLog; /* Log bucket size for collision resolution, at most 8 */
266e0c1b49fSNick Terrell U32 minMatchLength; /* Minimum match length */
267e0c1b49fSNick Terrell U32 hashRateLog; /* Log number of entries to skip */
268e0c1b49fSNick Terrell U32 windowLog; /* Window log for the LDM */
269e0c1b49fSNick Terrell } ldmParams_t;
270e0c1b49fSNick Terrell
271e0c1b49fSNick Terrell typedef struct {
272e0c1b49fSNick Terrell int collectSequences;
273e0c1b49fSNick Terrell ZSTD_Sequence* seqStart;
274e0c1b49fSNick Terrell size_t seqIndex;
275e0c1b49fSNick Terrell size_t maxSequences;
276e0c1b49fSNick Terrell } SeqCollector;
277e0c1b49fSNick Terrell
278e0c1b49fSNick Terrell struct ZSTD_CCtx_params_s {
279e0c1b49fSNick Terrell ZSTD_format_e format;
280e0c1b49fSNick Terrell ZSTD_compressionParameters cParams;
281e0c1b49fSNick Terrell ZSTD_frameParameters fParams;
282e0c1b49fSNick Terrell
283e0c1b49fSNick Terrell int compressionLevel;
284e0c1b49fSNick Terrell int forceWindow; /* force back-references to respect limit of
285e0c1b49fSNick Terrell * 1<<wLog, even for dictionary */
286e0c1b49fSNick Terrell size_t targetCBlockSize; /* Tries to fit compressed block size to be around targetCBlockSize.
287e0c1b49fSNick Terrell * No target when targetCBlockSize == 0.
288e0c1b49fSNick Terrell * There is no guarantee on compressed block size */
289e0c1b49fSNick Terrell int srcSizeHint; /* User's best guess of source size.
290e0c1b49fSNick Terrell * Hint is not valid when srcSizeHint == 0.
291e0c1b49fSNick Terrell * There is no guarantee that hint is close to actual source size */
292e0c1b49fSNick Terrell
293e0c1b49fSNick Terrell ZSTD_dictAttachPref_e attachDictPref;
294*2aa14b1aSNick Terrell ZSTD_paramSwitch_e literalCompressionMode;
295e0c1b49fSNick Terrell
296e0c1b49fSNick Terrell /* Multithreading: used to pass parameters to mtctx */
297e0c1b49fSNick Terrell int nbWorkers;
298e0c1b49fSNick Terrell size_t jobSize;
299e0c1b49fSNick Terrell int overlapLog;
300e0c1b49fSNick Terrell int rsyncable;
301e0c1b49fSNick Terrell
302e0c1b49fSNick Terrell /* Long distance matching parameters */
303e0c1b49fSNick Terrell ldmParams_t ldmParams;
304e0c1b49fSNick Terrell
305e0c1b49fSNick Terrell /* Dedicated dict search algorithm trigger */
306e0c1b49fSNick Terrell int enableDedicatedDictSearch;
307e0c1b49fSNick Terrell
308e0c1b49fSNick Terrell /* Input/output buffer modes */
309e0c1b49fSNick Terrell ZSTD_bufferMode_e inBufferMode;
310e0c1b49fSNick Terrell ZSTD_bufferMode_e outBufferMode;
311e0c1b49fSNick Terrell
312e0c1b49fSNick Terrell /* Sequence compression API */
313e0c1b49fSNick Terrell ZSTD_sequenceFormat_e blockDelimiters;
314e0c1b49fSNick Terrell int validateSequences;
315e0c1b49fSNick Terrell
316*2aa14b1aSNick Terrell /* Block splitting */
317*2aa14b1aSNick Terrell ZSTD_paramSwitch_e useBlockSplitter;
318*2aa14b1aSNick Terrell
319*2aa14b1aSNick Terrell /* Param for deciding whether to use row-based matchfinder */
320*2aa14b1aSNick Terrell ZSTD_paramSwitch_e useRowMatchFinder;
321*2aa14b1aSNick Terrell
322*2aa14b1aSNick Terrell /* Always load a dictionary in ext-dict mode (not prefix mode)? */
323*2aa14b1aSNick Terrell int deterministicRefPrefix;
324*2aa14b1aSNick Terrell
325e0c1b49fSNick Terrell /* Internal use, for createCCtxParams() and freeCCtxParams() only */
326e0c1b49fSNick Terrell ZSTD_customMem customMem;
327e0c1b49fSNick Terrell }; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */
328e0c1b49fSNick Terrell
329e0c1b49fSNick Terrell #define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2))
330e0c1b49fSNick Terrell #define ENTROPY_WORKSPACE_SIZE (HUF_WORKSPACE_SIZE + COMPRESS_SEQUENCES_WORKSPACE_SIZE)
331e0c1b49fSNick Terrell
332e0c1b49fSNick Terrell /*
333e0c1b49fSNick Terrell * Indicates whether this compression proceeds directly from user-provided
334e0c1b49fSNick Terrell * source buffer to user-provided destination buffer (ZSTDb_not_buffered), or
335e0c1b49fSNick Terrell * whether the context needs to buffer the input/output (ZSTDb_buffered).
336e0c1b49fSNick Terrell */
337e0c1b49fSNick Terrell typedef enum {
338e0c1b49fSNick Terrell ZSTDb_not_buffered,
339e0c1b49fSNick Terrell ZSTDb_buffered
340e0c1b49fSNick Terrell } ZSTD_buffered_policy_e;
341e0c1b49fSNick Terrell
342*2aa14b1aSNick Terrell /*
343*2aa14b1aSNick Terrell * Struct that contains all elements of block splitter that should be allocated
344*2aa14b1aSNick Terrell * in a wksp.
345*2aa14b1aSNick Terrell */
346*2aa14b1aSNick Terrell #define ZSTD_MAX_NB_BLOCK_SPLITS 196
347*2aa14b1aSNick Terrell typedef struct {
348*2aa14b1aSNick Terrell seqStore_t fullSeqStoreChunk;
349*2aa14b1aSNick Terrell seqStore_t firstHalfSeqStore;
350*2aa14b1aSNick Terrell seqStore_t secondHalfSeqStore;
351*2aa14b1aSNick Terrell seqStore_t currSeqStore;
352*2aa14b1aSNick Terrell seqStore_t nextSeqStore;
353*2aa14b1aSNick Terrell
354*2aa14b1aSNick Terrell U32 partitions[ZSTD_MAX_NB_BLOCK_SPLITS];
355*2aa14b1aSNick Terrell ZSTD_entropyCTablesMetadata_t entropyMetadata;
356*2aa14b1aSNick Terrell } ZSTD_blockSplitCtx;
357*2aa14b1aSNick Terrell
358e0c1b49fSNick Terrell struct ZSTD_CCtx_s {
359e0c1b49fSNick Terrell ZSTD_compressionStage_e stage;
360e0c1b49fSNick Terrell int cParamsChanged; /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */
361e0c1b49fSNick Terrell int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
362e0c1b49fSNick Terrell ZSTD_CCtx_params requestedParams;
363e0c1b49fSNick Terrell ZSTD_CCtx_params appliedParams;
364*2aa14b1aSNick Terrell ZSTD_CCtx_params simpleApiParams; /* Param storage used by the simple API - not sticky. Must only be used in top-level simple API functions for storage. */
365e0c1b49fSNick Terrell U32 dictID;
366e0c1b49fSNick Terrell size_t dictContentSize;
367e0c1b49fSNick Terrell
368e0c1b49fSNick Terrell ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */
369e0c1b49fSNick Terrell size_t blockSize;
370e0c1b49fSNick Terrell unsigned long long pledgedSrcSizePlusOne; /* this way, 0 (default) == unknown */
371e0c1b49fSNick Terrell unsigned long long consumedSrcSize;
372e0c1b49fSNick Terrell unsigned long long producedCSize;
373e0c1b49fSNick Terrell struct xxh64_state xxhState;
374e0c1b49fSNick Terrell ZSTD_customMem customMem;
375e0c1b49fSNick Terrell ZSTD_threadPool* pool;
376e0c1b49fSNick Terrell size_t staticSize;
377e0c1b49fSNick Terrell SeqCollector seqCollector;
378e0c1b49fSNick Terrell int isFirstBlock;
379e0c1b49fSNick Terrell int initialized;
380e0c1b49fSNick Terrell
381e0c1b49fSNick Terrell seqStore_t seqStore; /* sequences storage ptrs */
382e0c1b49fSNick Terrell ldmState_t ldmState; /* long distance matching state */
383e0c1b49fSNick Terrell rawSeq* ldmSequences; /* Storage for the ldm output sequences */
384e0c1b49fSNick Terrell size_t maxNbLdmSequences;
385e0c1b49fSNick Terrell rawSeqStore_t externSeqStore; /* Mutable reference to external sequences */
386e0c1b49fSNick Terrell ZSTD_blockState_t blockState;
387e0c1b49fSNick Terrell U32* entropyWorkspace; /* entropy workspace of ENTROPY_WORKSPACE_SIZE bytes */
388e0c1b49fSNick Terrell
389*2aa14b1aSNick Terrell /* Whether we are streaming or not */
390e0c1b49fSNick Terrell ZSTD_buffered_policy_e bufferedPolicy;
391e0c1b49fSNick Terrell
392e0c1b49fSNick Terrell /* streaming */
393e0c1b49fSNick Terrell char* inBuff;
394e0c1b49fSNick Terrell size_t inBuffSize;
395e0c1b49fSNick Terrell size_t inToCompress;
396e0c1b49fSNick Terrell size_t inBuffPos;
397e0c1b49fSNick Terrell size_t inBuffTarget;
398e0c1b49fSNick Terrell char* outBuff;
399e0c1b49fSNick Terrell size_t outBuffSize;
400e0c1b49fSNick Terrell size_t outBuffContentSize;
401e0c1b49fSNick Terrell size_t outBuffFlushedSize;
402e0c1b49fSNick Terrell ZSTD_cStreamStage streamStage;
403e0c1b49fSNick Terrell U32 frameEnded;
404e0c1b49fSNick Terrell
405e0c1b49fSNick Terrell /* Stable in/out buffer verification */
406e0c1b49fSNick Terrell ZSTD_inBuffer expectedInBuffer;
407e0c1b49fSNick Terrell size_t expectedOutBufferSize;
408e0c1b49fSNick Terrell
409e0c1b49fSNick Terrell /* Dictionary */
410e0c1b49fSNick Terrell ZSTD_localDict localDict;
411e0c1b49fSNick Terrell const ZSTD_CDict* cdict;
412e0c1b49fSNick Terrell ZSTD_prefixDict prefixDict; /* single-usage dictionary */
413e0c1b49fSNick Terrell
414e0c1b49fSNick Terrell /* Multi-threading */
415e0c1b49fSNick Terrell
416e0c1b49fSNick Terrell /* Tracing */
417*2aa14b1aSNick Terrell
418*2aa14b1aSNick Terrell /* Workspace for block splitter */
419*2aa14b1aSNick Terrell ZSTD_blockSplitCtx blockSplitCtx;
420e0c1b49fSNick Terrell };
421e0c1b49fSNick Terrell
422e0c1b49fSNick Terrell typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
423e0c1b49fSNick Terrell
424e0c1b49fSNick Terrell typedef enum {
425e0c1b49fSNick Terrell ZSTD_noDict = 0,
426e0c1b49fSNick Terrell ZSTD_extDict = 1,
427e0c1b49fSNick Terrell ZSTD_dictMatchState = 2,
428e0c1b49fSNick Terrell ZSTD_dedicatedDictSearch = 3
429e0c1b49fSNick Terrell } ZSTD_dictMode_e;
430e0c1b49fSNick Terrell
431e0c1b49fSNick Terrell typedef enum {
432e0c1b49fSNick Terrell ZSTD_cpm_noAttachDict = 0, /* Compression with ZSTD_noDict or ZSTD_extDict.
433e0c1b49fSNick Terrell * In this mode we use both the srcSize and the dictSize
434e0c1b49fSNick Terrell * when selecting and adjusting parameters.
435e0c1b49fSNick Terrell */
436e0c1b49fSNick Terrell ZSTD_cpm_attachDict = 1, /* Compression with ZSTD_dictMatchState or ZSTD_dedicatedDictSearch.
437e0c1b49fSNick Terrell * In this mode we only take the srcSize into account when selecting
438e0c1b49fSNick Terrell * and adjusting parameters.
439e0c1b49fSNick Terrell */
440e0c1b49fSNick Terrell ZSTD_cpm_createCDict = 2, /* Creating a CDict.
441e0c1b49fSNick Terrell * In this mode we take both the source size and the dictionary size
442e0c1b49fSNick Terrell * into account when selecting and adjusting the parameters.
443e0c1b49fSNick Terrell */
444e0c1b49fSNick Terrell ZSTD_cpm_unknown = 3, /* ZSTD_getCParams, ZSTD_getParams, ZSTD_adjustParams.
445e0c1b49fSNick Terrell * We don't know what these parameters are for. We default to the legacy
446e0c1b49fSNick Terrell * behavior of taking both the source size and the dict size into account
447e0c1b49fSNick Terrell * when selecting and adjusting parameters.
448e0c1b49fSNick Terrell */
449e0c1b49fSNick Terrell } ZSTD_cParamMode_e;
450e0c1b49fSNick Terrell
451e0c1b49fSNick Terrell typedef size_t (*ZSTD_blockCompressor) (
452e0c1b49fSNick Terrell ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
453e0c1b49fSNick Terrell void const* src, size_t srcSize);
454*2aa14b1aSNick Terrell ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramSwitch_e rowMatchfinderMode, ZSTD_dictMode_e dictMode);
455e0c1b49fSNick Terrell
456e0c1b49fSNick Terrell
ZSTD_LLcode(U32 litLength)457e0c1b49fSNick Terrell MEM_STATIC U32 ZSTD_LLcode(U32 litLength)
458e0c1b49fSNick Terrell {
459e0c1b49fSNick Terrell static const BYTE LL_Code[64] = { 0, 1, 2, 3, 4, 5, 6, 7,
460e0c1b49fSNick Terrell 8, 9, 10, 11, 12, 13, 14, 15,
461e0c1b49fSNick Terrell 16, 16, 17, 17, 18, 18, 19, 19,
462e0c1b49fSNick Terrell 20, 20, 20, 20, 21, 21, 21, 21,
463e0c1b49fSNick Terrell 22, 22, 22, 22, 22, 22, 22, 22,
464e0c1b49fSNick Terrell 23, 23, 23, 23, 23, 23, 23, 23,
465e0c1b49fSNick Terrell 24, 24, 24, 24, 24, 24, 24, 24,
466e0c1b49fSNick Terrell 24, 24, 24, 24, 24, 24, 24, 24 };
467e0c1b49fSNick Terrell static const U32 LL_deltaCode = 19;
468e0c1b49fSNick Terrell return (litLength > 63) ? ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength];
469e0c1b49fSNick Terrell }
470e0c1b49fSNick Terrell
471e0c1b49fSNick Terrell /* ZSTD_MLcode() :
472e0c1b49fSNick Terrell * note : mlBase = matchLength - MINMATCH;
473e0c1b49fSNick Terrell * because it's the format it's stored in seqStore->sequences */
ZSTD_MLcode(U32 mlBase)474e0c1b49fSNick Terrell MEM_STATIC U32 ZSTD_MLcode(U32 mlBase)
475e0c1b49fSNick Terrell {
476e0c1b49fSNick Terrell static const BYTE ML_Code[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
477e0c1b49fSNick Terrell 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
478e0c1b49fSNick Terrell 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37,
479e0c1b49fSNick Terrell 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39,
480e0c1b49fSNick Terrell 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
481e0c1b49fSNick Terrell 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41,
482e0c1b49fSNick Terrell 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
483e0c1b49fSNick Terrell 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 };
484e0c1b49fSNick Terrell static const U32 ML_deltaCode = 36;
485e0c1b49fSNick Terrell return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase];
486e0c1b49fSNick Terrell }
487e0c1b49fSNick Terrell
488e0c1b49fSNick Terrell /* ZSTD_cParam_withinBounds:
489e0c1b49fSNick Terrell * @return 1 if value is within cParam bounds,
490e0c1b49fSNick Terrell * 0 otherwise */
ZSTD_cParam_withinBounds(ZSTD_cParameter cParam,int value)491e0c1b49fSNick Terrell MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value)
492e0c1b49fSNick Terrell {
493e0c1b49fSNick Terrell ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);
494e0c1b49fSNick Terrell if (ZSTD_isError(bounds.error)) return 0;
495e0c1b49fSNick Terrell if (value < bounds.lowerBound) return 0;
496e0c1b49fSNick Terrell if (value > bounds.upperBound) return 0;
497e0c1b49fSNick Terrell return 1;
498e0c1b49fSNick Terrell }
499e0c1b49fSNick Terrell
500e0c1b49fSNick Terrell /* ZSTD_noCompressBlock() :
501e0c1b49fSNick Terrell * Writes uncompressed block to dst buffer from given src.
502e0c1b49fSNick Terrell * Returns the size of the block */
ZSTD_noCompressBlock(void * dst,size_t dstCapacity,const void * src,size_t srcSize,U32 lastBlock)503e0c1b49fSNick Terrell MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock)
504e0c1b49fSNick Terrell {
505e0c1b49fSNick Terrell U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3);
506e0c1b49fSNick Terrell RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity,
507e0c1b49fSNick Terrell dstSize_tooSmall, "dst buf too small for uncompressed block");
508e0c1b49fSNick Terrell MEM_writeLE24(dst, cBlockHeader24);
509e0c1b49fSNick Terrell ZSTD_memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize);
510e0c1b49fSNick Terrell return ZSTD_blockHeaderSize + srcSize;
511e0c1b49fSNick Terrell }
512e0c1b49fSNick Terrell
ZSTD_rleCompressBlock(void * dst,size_t dstCapacity,BYTE src,size_t srcSize,U32 lastBlock)513e0c1b49fSNick Terrell MEM_STATIC size_t ZSTD_rleCompressBlock (void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock)
514e0c1b49fSNick Terrell {
515e0c1b49fSNick Terrell BYTE* const op = (BYTE*)dst;
516e0c1b49fSNick Terrell U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3);
517e0c1b49fSNick Terrell RETURN_ERROR_IF(dstCapacity < 4, dstSize_tooSmall, "");
518e0c1b49fSNick Terrell MEM_writeLE24(op, cBlockHeader);
519e0c1b49fSNick Terrell op[3] = src;
520e0c1b49fSNick Terrell return 4;
521e0c1b49fSNick Terrell }
522e0c1b49fSNick Terrell
523e0c1b49fSNick Terrell
524e0c1b49fSNick Terrell /* ZSTD_minGain() :
525e0c1b49fSNick Terrell * minimum compression required
526e0c1b49fSNick Terrell * to generate a compress block or a compressed literals section.
527e0c1b49fSNick Terrell * note : use same formula for both situations */
ZSTD_minGain(size_t srcSize,ZSTD_strategy strat)528e0c1b49fSNick Terrell MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
529e0c1b49fSNick Terrell {
530e0c1b49fSNick Terrell U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6;
531e0c1b49fSNick Terrell ZSTD_STATIC_ASSERT(ZSTD_btultra == 8);
532e0c1b49fSNick Terrell assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat));
533e0c1b49fSNick Terrell return (srcSize >> minlog) + 2;
534e0c1b49fSNick Terrell }
535e0c1b49fSNick Terrell
ZSTD_literalsCompressionIsDisabled(const ZSTD_CCtx_params * cctxParams)536*2aa14b1aSNick Terrell MEM_STATIC int ZSTD_literalsCompressionIsDisabled(const ZSTD_CCtx_params* cctxParams)
537e0c1b49fSNick Terrell {
538e0c1b49fSNick Terrell switch (cctxParams->literalCompressionMode) {
539*2aa14b1aSNick Terrell case ZSTD_ps_enable:
540e0c1b49fSNick Terrell return 0;
541*2aa14b1aSNick Terrell case ZSTD_ps_disable:
542e0c1b49fSNick Terrell return 1;
543e0c1b49fSNick Terrell default:
544e0c1b49fSNick Terrell assert(0 /* impossible: pre-validated */);
545e0c1b49fSNick Terrell ZSTD_FALLTHROUGH;
546*2aa14b1aSNick Terrell case ZSTD_ps_auto:
547e0c1b49fSNick Terrell return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0);
548e0c1b49fSNick Terrell }
549e0c1b49fSNick Terrell }
550e0c1b49fSNick Terrell
551e0c1b49fSNick Terrell /*! ZSTD_safecopyLiterals() :
552e0c1b49fSNick Terrell * memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w.
553e0c1b49fSNick Terrell * Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single
554e0c1b49fSNick Terrell * large copies.
555e0c1b49fSNick Terrell */
556*2aa14b1aSNick Terrell static void
ZSTD_safecopyLiterals(BYTE * op,BYTE const * ip,BYTE const * const iend,BYTE const * ilimit_w)557*2aa14b1aSNick Terrell ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w)
558*2aa14b1aSNick Terrell {
559e0c1b49fSNick Terrell assert(iend > ilimit_w);
560e0c1b49fSNick Terrell if (ip <= ilimit_w) {
561e0c1b49fSNick Terrell ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap);
562e0c1b49fSNick Terrell op += ilimit_w - ip;
563e0c1b49fSNick Terrell ip = ilimit_w;
564e0c1b49fSNick Terrell }
565e0c1b49fSNick Terrell while (ip < iend) *op++ = *ip++;
566e0c1b49fSNick Terrell }
567e0c1b49fSNick Terrell
568*2aa14b1aSNick Terrell #define ZSTD_REP_MOVE (ZSTD_REP_NUM-1)
569*2aa14b1aSNick Terrell #define STORE_REPCODE_1 STORE_REPCODE(1)
570*2aa14b1aSNick Terrell #define STORE_REPCODE_2 STORE_REPCODE(2)
571*2aa14b1aSNick Terrell #define STORE_REPCODE_3 STORE_REPCODE(3)
572*2aa14b1aSNick Terrell #define STORE_REPCODE(r) (assert((r)>=1), assert((r)<=3), (r)-1)
573*2aa14b1aSNick Terrell #define STORE_OFFSET(o) (assert((o)>0), o + ZSTD_REP_MOVE)
574*2aa14b1aSNick Terrell #define STORED_IS_OFFSET(o) ((o) > ZSTD_REP_MOVE)
575*2aa14b1aSNick Terrell #define STORED_IS_REPCODE(o) ((o) <= ZSTD_REP_MOVE)
576*2aa14b1aSNick Terrell #define STORED_OFFSET(o) (assert(STORED_IS_OFFSET(o)), (o)-ZSTD_REP_MOVE)
577*2aa14b1aSNick Terrell #define STORED_REPCODE(o) (assert(STORED_IS_REPCODE(o)), (o)+1) /* returns ID 1,2,3 */
578*2aa14b1aSNick Terrell #define STORED_TO_OFFBASE(o) ((o)+1)
579*2aa14b1aSNick Terrell #define OFFBASE_TO_STORED(o) ((o)-1)
580*2aa14b1aSNick Terrell
581e0c1b49fSNick Terrell /*! ZSTD_storeSeq() :
582*2aa14b1aSNick Terrell * Store a sequence (litlen, litPtr, offCode and matchLength) into seqStore_t.
583*2aa14b1aSNick Terrell * @offBase_minus1 : Users should use employ macros STORE_REPCODE_X and STORE_OFFSET().
584*2aa14b1aSNick Terrell * @matchLength : must be >= MINMATCH
585e0c1b49fSNick Terrell * Allowed to overread literals up to litLimit.
586e0c1b49fSNick Terrell */
587*2aa14b1aSNick Terrell HINT_INLINE UNUSED_ATTR void
ZSTD_storeSeq(seqStore_t * seqStorePtr,size_t litLength,const BYTE * literals,const BYTE * litLimit,U32 offBase_minus1,size_t matchLength)588*2aa14b1aSNick Terrell ZSTD_storeSeq(seqStore_t* seqStorePtr,
589*2aa14b1aSNick Terrell size_t litLength, const BYTE* literals, const BYTE* litLimit,
590*2aa14b1aSNick Terrell U32 offBase_minus1,
591*2aa14b1aSNick Terrell size_t matchLength)
592e0c1b49fSNick Terrell {
593e0c1b49fSNick Terrell BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH;
594e0c1b49fSNick Terrell BYTE const* const litEnd = literals + litLength;
595e0c1b49fSNick Terrell #if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6)
596e0c1b49fSNick Terrell static const BYTE* g_start = NULL;
597e0c1b49fSNick Terrell if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */
598e0c1b49fSNick Terrell { U32 const pos = (U32)((const BYTE*)literals - g_start);
599e0c1b49fSNick Terrell DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u",
600*2aa14b1aSNick Terrell pos, (U32)litLength, (U32)matchLength, (U32)offBase_minus1);
601e0c1b49fSNick Terrell }
602e0c1b49fSNick Terrell #endif
603e0c1b49fSNick Terrell assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
604e0c1b49fSNick Terrell /* copy Literals */
605e0c1b49fSNick Terrell assert(seqStorePtr->maxNbLit <= 128 KB);
606e0c1b49fSNick Terrell assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit);
607e0c1b49fSNick Terrell assert(literals + litLength <= litLimit);
608e0c1b49fSNick Terrell if (litEnd <= litLimit_w) {
609e0c1b49fSNick Terrell /* Common case we can use wildcopy.
610e0c1b49fSNick Terrell * First copy 16 bytes, because literals are likely short.
611e0c1b49fSNick Terrell */
612e0c1b49fSNick Terrell assert(WILDCOPY_OVERLENGTH >= 16);
613e0c1b49fSNick Terrell ZSTD_copy16(seqStorePtr->lit, literals);
614e0c1b49fSNick Terrell if (litLength > 16) {
615e0c1b49fSNick Terrell ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap);
616e0c1b49fSNick Terrell }
617e0c1b49fSNick Terrell } else {
618e0c1b49fSNick Terrell ZSTD_safecopyLiterals(seqStorePtr->lit, literals, litEnd, litLimit_w);
619e0c1b49fSNick Terrell }
620e0c1b49fSNick Terrell seqStorePtr->lit += litLength;
621e0c1b49fSNick Terrell
622e0c1b49fSNick Terrell /* literal Length */
623e0c1b49fSNick Terrell if (litLength>0xFFFF) {
624*2aa14b1aSNick Terrell assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
625*2aa14b1aSNick Terrell seqStorePtr->longLengthType = ZSTD_llt_literalLength;
626e0c1b49fSNick Terrell seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
627e0c1b49fSNick Terrell }
628e0c1b49fSNick Terrell seqStorePtr->sequences[0].litLength = (U16)litLength;
629e0c1b49fSNick Terrell
630e0c1b49fSNick Terrell /* match offset */
631*2aa14b1aSNick Terrell seqStorePtr->sequences[0].offBase = STORED_TO_OFFBASE(offBase_minus1);
632e0c1b49fSNick Terrell
633e0c1b49fSNick Terrell /* match Length */
634*2aa14b1aSNick Terrell assert(matchLength >= MINMATCH);
635*2aa14b1aSNick Terrell { size_t const mlBase = matchLength - MINMATCH;
636e0c1b49fSNick Terrell if (mlBase>0xFFFF) {
637*2aa14b1aSNick Terrell assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
638*2aa14b1aSNick Terrell seqStorePtr->longLengthType = ZSTD_llt_matchLength;
639e0c1b49fSNick Terrell seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
640e0c1b49fSNick Terrell }
641*2aa14b1aSNick Terrell seqStorePtr->sequences[0].mlBase = (U16)mlBase;
642*2aa14b1aSNick Terrell }
643e0c1b49fSNick Terrell
644e0c1b49fSNick Terrell seqStorePtr->sequences++;
645e0c1b49fSNick Terrell }
646e0c1b49fSNick Terrell
647*2aa14b1aSNick Terrell /* ZSTD_updateRep() :
648*2aa14b1aSNick Terrell * updates in-place @rep (array of repeat offsets)
649*2aa14b1aSNick Terrell * @offBase_minus1 : sum-type, with same numeric representation as ZSTD_storeSeq()
650*2aa14b1aSNick Terrell */
651*2aa14b1aSNick Terrell MEM_STATIC void
ZSTD_updateRep(U32 rep[ZSTD_REP_NUM],U32 const offBase_minus1,U32 const ll0)652*2aa14b1aSNick Terrell ZSTD_updateRep(U32 rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0)
653*2aa14b1aSNick Terrell {
654*2aa14b1aSNick Terrell if (STORED_IS_OFFSET(offBase_minus1)) { /* full offset */
655*2aa14b1aSNick Terrell rep[2] = rep[1];
656*2aa14b1aSNick Terrell rep[1] = rep[0];
657*2aa14b1aSNick Terrell rep[0] = STORED_OFFSET(offBase_minus1);
658*2aa14b1aSNick Terrell } else { /* repcode */
659*2aa14b1aSNick Terrell U32 const repCode = STORED_REPCODE(offBase_minus1) - 1 + ll0;
660*2aa14b1aSNick Terrell if (repCode > 0) { /* note : if repCode==0, no change */
661*2aa14b1aSNick Terrell U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
662*2aa14b1aSNick Terrell rep[2] = (repCode >= 2) ? rep[1] : rep[2];
663*2aa14b1aSNick Terrell rep[1] = rep[0];
664*2aa14b1aSNick Terrell rep[0] = currentOffset;
665*2aa14b1aSNick Terrell } else { /* repCode == 0 */
666*2aa14b1aSNick Terrell /* nothing to do */
667*2aa14b1aSNick Terrell }
668*2aa14b1aSNick Terrell }
669*2aa14b1aSNick Terrell }
670*2aa14b1aSNick Terrell
671*2aa14b1aSNick Terrell typedef struct repcodes_s {
672*2aa14b1aSNick Terrell U32 rep[3];
673*2aa14b1aSNick Terrell } repcodes_t;
674*2aa14b1aSNick Terrell
675*2aa14b1aSNick Terrell MEM_STATIC repcodes_t
ZSTD_newRep(U32 const rep[ZSTD_REP_NUM],U32 const offBase_minus1,U32 const ll0)676*2aa14b1aSNick Terrell ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0)
677*2aa14b1aSNick Terrell {
678*2aa14b1aSNick Terrell repcodes_t newReps;
679*2aa14b1aSNick Terrell ZSTD_memcpy(&newReps, rep, sizeof(newReps));
680*2aa14b1aSNick Terrell ZSTD_updateRep(newReps.rep, offBase_minus1, ll0);
681*2aa14b1aSNick Terrell return newReps;
682*2aa14b1aSNick Terrell }
683*2aa14b1aSNick Terrell
684e0c1b49fSNick Terrell
685e0c1b49fSNick Terrell /*-*************************************
686e0c1b49fSNick Terrell * Match length counter
687e0c1b49fSNick Terrell ***************************************/
ZSTD_NbCommonBytes(size_t val)688e0c1b49fSNick Terrell static unsigned ZSTD_NbCommonBytes (size_t val)
689e0c1b49fSNick Terrell {
690e0c1b49fSNick Terrell if (MEM_isLittleEndian()) {
691e0c1b49fSNick Terrell if (MEM_64bits()) {
692e0c1b49fSNick Terrell # if (__GNUC__ >= 4)
693e0c1b49fSNick Terrell return (__builtin_ctzll((U64)val) >> 3);
694e0c1b49fSNick Terrell # else
695e0c1b49fSNick Terrell static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2,
696e0c1b49fSNick Terrell 0, 3, 1, 3, 1, 4, 2, 7,
697e0c1b49fSNick Terrell 0, 2, 3, 6, 1, 5, 3, 5,
698e0c1b49fSNick Terrell 1, 3, 4, 4, 2, 5, 6, 7,
699e0c1b49fSNick Terrell 7, 0, 1, 2, 3, 3, 4, 6,
700e0c1b49fSNick Terrell 2, 6, 5, 5, 3, 4, 5, 6,
701e0c1b49fSNick Terrell 7, 1, 2, 4, 6, 4, 4, 5,
702e0c1b49fSNick Terrell 7, 2, 6, 5, 7, 6, 7, 7 };
703e0c1b49fSNick Terrell return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
704e0c1b49fSNick Terrell # endif
705e0c1b49fSNick Terrell } else { /* 32 bits */
706e0c1b49fSNick Terrell # if (__GNUC__ >= 3)
707e0c1b49fSNick Terrell return (__builtin_ctz((U32)val) >> 3);
708e0c1b49fSNick Terrell # else
709e0c1b49fSNick Terrell static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0,
710e0c1b49fSNick Terrell 3, 2, 2, 1, 3, 2, 0, 1,
711e0c1b49fSNick Terrell 3, 3, 1, 2, 2, 2, 2, 0,
712e0c1b49fSNick Terrell 3, 1, 2, 0, 1, 0, 1, 1 };
713e0c1b49fSNick Terrell return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
714e0c1b49fSNick Terrell # endif
715e0c1b49fSNick Terrell }
716e0c1b49fSNick Terrell } else { /* Big Endian CPU */
717e0c1b49fSNick Terrell if (MEM_64bits()) {
718e0c1b49fSNick Terrell # if (__GNUC__ >= 4)
719e0c1b49fSNick Terrell return (__builtin_clzll(val) >> 3);
720e0c1b49fSNick Terrell # else
721e0c1b49fSNick Terrell unsigned r;
722e0c1b49fSNick Terrell const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */
723e0c1b49fSNick Terrell if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
724e0c1b49fSNick Terrell if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
725e0c1b49fSNick Terrell r += (!val);
726e0c1b49fSNick Terrell return r;
727e0c1b49fSNick Terrell # endif
728e0c1b49fSNick Terrell } else { /* 32 bits */
729e0c1b49fSNick Terrell # if (__GNUC__ >= 3)
730e0c1b49fSNick Terrell return (__builtin_clz((U32)val) >> 3);
731e0c1b49fSNick Terrell # else
732e0c1b49fSNick Terrell unsigned r;
733e0c1b49fSNick Terrell if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
734e0c1b49fSNick Terrell r += (!val);
735e0c1b49fSNick Terrell return r;
736e0c1b49fSNick Terrell # endif
737e0c1b49fSNick Terrell } }
738e0c1b49fSNick Terrell }
739e0c1b49fSNick Terrell
740e0c1b49fSNick Terrell
ZSTD_count(const BYTE * pIn,const BYTE * pMatch,const BYTE * const pInLimit)741e0c1b49fSNick Terrell MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit)
742e0c1b49fSNick Terrell {
743e0c1b49fSNick Terrell const BYTE* const pStart = pIn;
744e0c1b49fSNick Terrell const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1);
745e0c1b49fSNick Terrell
746e0c1b49fSNick Terrell if (pIn < pInLoopLimit) {
747e0c1b49fSNick Terrell { size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
748e0c1b49fSNick Terrell if (diff) return ZSTD_NbCommonBytes(diff); }
749e0c1b49fSNick Terrell pIn+=sizeof(size_t); pMatch+=sizeof(size_t);
750e0c1b49fSNick Terrell while (pIn < pInLoopLimit) {
751e0c1b49fSNick Terrell size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
752e0c1b49fSNick Terrell if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; }
753e0c1b49fSNick Terrell pIn += ZSTD_NbCommonBytes(diff);
754e0c1b49fSNick Terrell return (size_t)(pIn - pStart);
755e0c1b49fSNick Terrell } }
756e0c1b49fSNick Terrell if (MEM_64bits() && (pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; }
757e0c1b49fSNick Terrell if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; }
758e0c1b49fSNick Terrell if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
759e0c1b49fSNick Terrell return (size_t)(pIn - pStart);
760e0c1b49fSNick Terrell }
761e0c1b49fSNick Terrell
762e0c1b49fSNick Terrell /* ZSTD_count_2segments() :
763e0c1b49fSNick Terrell * can count match length with `ip` & `match` in 2 different segments.
764e0c1b49fSNick Terrell * convention : on reaching mEnd, match count continue starting from iStart
765e0c1b49fSNick Terrell */
766e0c1b49fSNick Terrell MEM_STATIC size_t
ZSTD_count_2segments(const BYTE * ip,const BYTE * match,const BYTE * iEnd,const BYTE * mEnd,const BYTE * iStart)767e0c1b49fSNick Terrell ZSTD_count_2segments(const BYTE* ip, const BYTE* match,
768e0c1b49fSNick Terrell const BYTE* iEnd, const BYTE* mEnd, const BYTE* iStart)
769e0c1b49fSNick Terrell {
770e0c1b49fSNick Terrell const BYTE* const vEnd = MIN( ip + (mEnd - match), iEnd);
771e0c1b49fSNick Terrell size_t const matchLength = ZSTD_count(ip, match, vEnd);
772e0c1b49fSNick Terrell if (match + matchLength != mEnd) return matchLength;
773e0c1b49fSNick Terrell DEBUGLOG(7, "ZSTD_count_2segments: found a 2-parts match (current length==%zu)", matchLength);
774e0c1b49fSNick Terrell DEBUGLOG(7, "distance from match beginning to end dictionary = %zi", mEnd - match);
775e0c1b49fSNick Terrell DEBUGLOG(7, "distance from current pos to end buffer = %zi", iEnd - ip);
776e0c1b49fSNick Terrell DEBUGLOG(7, "next byte : ip==%02X, istart==%02X", ip[matchLength], *iStart);
777e0c1b49fSNick Terrell DEBUGLOG(7, "final match length = %zu", matchLength + ZSTD_count(ip+matchLength, iStart, iEnd));
778e0c1b49fSNick Terrell return matchLength + ZSTD_count(ip+matchLength, iStart, iEnd);
779e0c1b49fSNick Terrell }
780e0c1b49fSNick Terrell
781e0c1b49fSNick Terrell
782e0c1b49fSNick Terrell /*-*************************************
783e0c1b49fSNick Terrell * Hashes
784e0c1b49fSNick Terrell ***************************************/
785e0c1b49fSNick Terrell static const U32 prime3bytes = 506832829U;
ZSTD_hash3(U32 u,U32 h)786e0c1b49fSNick Terrell static U32 ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes) >> (32-h) ; }
ZSTD_hash3Ptr(const void * ptr,U32 h)787e0c1b49fSNick Terrell MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */
788e0c1b49fSNick Terrell
789e0c1b49fSNick Terrell static const U32 prime4bytes = 2654435761U;
ZSTD_hash4(U32 u,U32 h)790e0c1b49fSNick Terrell static U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; }
ZSTD_hash4Ptr(const void * ptr,U32 h)791e0c1b49fSNick Terrell static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); }
792e0c1b49fSNick Terrell
793e0c1b49fSNick Terrell static const U64 prime5bytes = 889523592379ULL;
ZSTD_hash5(U64 u,U32 h)794e0c1b49fSNick Terrell static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u << (64-40)) * prime5bytes) >> (64-h)) ; }
ZSTD_hash5Ptr(const void * p,U32 h)795e0c1b49fSNick Terrell static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); }
796e0c1b49fSNick Terrell
797e0c1b49fSNick Terrell static const U64 prime6bytes = 227718039650203ULL;
ZSTD_hash6(U64 u,U32 h)798e0c1b49fSNick Terrell static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; }
ZSTD_hash6Ptr(const void * p,U32 h)799e0c1b49fSNick Terrell static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
800e0c1b49fSNick Terrell
801e0c1b49fSNick Terrell static const U64 prime7bytes = 58295818150454627ULL;
ZSTD_hash7(U64 u,U32 h)802e0c1b49fSNick Terrell static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u << (64-56)) * prime7bytes) >> (64-h)) ; }
ZSTD_hash7Ptr(const void * p,U32 h)803e0c1b49fSNick Terrell static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); }
804e0c1b49fSNick Terrell
805e0c1b49fSNick Terrell static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
ZSTD_hash8(U64 u,U32 h)806e0c1b49fSNick Terrell static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
ZSTD_hash8Ptr(const void * p,U32 h)807e0c1b49fSNick Terrell static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
808e0c1b49fSNick Terrell
809e0c1b49fSNick Terrell MEM_STATIC FORCE_INLINE_ATTR
ZSTD_hashPtr(const void * p,U32 hBits,U32 mls)810e0c1b49fSNick Terrell size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
811e0c1b49fSNick Terrell {
812e0c1b49fSNick Terrell switch(mls)
813e0c1b49fSNick Terrell {
814e0c1b49fSNick Terrell default:
815e0c1b49fSNick Terrell case 4: return ZSTD_hash4Ptr(p, hBits);
816e0c1b49fSNick Terrell case 5: return ZSTD_hash5Ptr(p, hBits);
817e0c1b49fSNick Terrell case 6: return ZSTD_hash6Ptr(p, hBits);
818e0c1b49fSNick Terrell case 7: return ZSTD_hash7Ptr(p, hBits);
819e0c1b49fSNick Terrell case 8: return ZSTD_hash8Ptr(p, hBits);
820e0c1b49fSNick Terrell }
821e0c1b49fSNick Terrell }
822e0c1b49fSNick Terrell
823e0c1b49fSNick Terrell /* ZSTD_ipow() :
824e0c1b49fSNick Terrell * Return base^exponent.
825e0c1b49fSNick Terrell */
ZSTD_ipow(U64 base,U64 exponent)826e0c1b49fSNick Terrell static U64 ZSTD_ipow(U64 base, U64 exponent)
827e0c1b49fSNick Terrell {
828e0c1b49fSNick Terrell U64 power = 1;
829e0c1b49fSNick Terrell while (exponent) {
830e0c1b49fSNick Terrell if (exponent & 1) power *= base;
831e0c1b49fSNick Terrell exponent >>= 1;
832e0c1b49fSNick Terrell base *= base;
833e0c1b49fSNick Terrell }
834e0c1b49fSNick Terrell return power;
835e0c1b49fSNick Terrell }
836e0c1b49fSNick Terrell
837e0c1b49fSNick Terrell #define ZSTD_ROLL_HASH_CHAR_OFFSET 10
838e0c1b49fSNick Terrell
839e0c1b49fSNick Terrell /* ZSTD_rollingHash_append() :
840e0c1b49fSNick Terrell * Add the buffer to the hash value.
841e0c1b49fSNick Terrell */
ZSTD_rollingHash_append(U64 hash,void const * buf,size_t size)842e0c1b49fSNick Terrell static U64 ZSTD_rollingHash_append(U64 hash, void const* buf, size_t size)
843e0c1b49fSNick Terrell {
844e0c1b49fSNick Terrell BYTE const* istart = (BYTE const*)buf;
845e0c1b49fSNick Terrell size_t pos;
846e0c1b49fSNick Terrell for (pos = 0; pos < size; ++pos) {
847e0c1b49fSNick Terrell hash *= prime8bytes;
848e0c1b49fSNick Terrell hash += istart[pos] + ZSTD_ROLL_HASH_CHAR_OFFSET;
849e0c1b49fSNick Terrell }
850e0c1b49fSNick Terrell return hash;
851e0c1b49fSNick Terrell }
852e0c1b49fSNick Terrell
853e0c1b49fSNick Terrell /* ZSTD_rollingHash_compute() :
854e0c1b49fSNick Terrell * Compute the rolling hash value of the buffer.
855e0c1b49fSNick Terrell */
ZSTD_rollingHash_compute(void const * buf,size_t size)856e0c1b49fSNick Terrell MEM_STATIC U64 ZSTD_rollingHash_compute(void const* buf, size_t size)
857e0c1b49fSNick Terrell {
858e0c1b49fSNick Terrell return ZSTD_rollingHash_append(0, buf, size);
859e0c1b49fSNick Terrell }
860e0c1b49fSNick Terrell
861e0c1b49fSNick Terrell /* ZSTD_rollingHash_primePower() :
862e0c1b49fSNick Terrell * Compute the primePower to be passed to ZSTD_rollingHash_rotate() for a hash
863e0c1b49fSNick Terrell * over a window of length bytes.
864e0c1b49fSNick Terrell */
ZSTD_rollingHash_primePower(U32 length)865e0c1b49fSNick Terrell MEM_STATIC U64 ZSTD_rollingHash_primePower(U32 length)
866e0c1b49fSNick Terrell {
867e0c1b49fSNick Terrell return ZSTD_ipow(prime8bytes, length - 1);
868e0c1b49fSNick Terrell }
869e0c1b49fSNick Terrell
870e0c1b49fSNick Terrell /* ZSTD_rollingHash_rotate() :
871e0c1b49fSNick Terrell * Rotate the rolling hash by one byte.
872e0c1b49fSNick Terrell */
ZSTD_rollingHash_rotate(U64 hash,BYTE toRemove,BYTE toAdd,U64 primePower)873e0c1b49fSNick Terrell MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64 primePower)
874e0c1b49fSNick Terrell {
875e0c1b49fSNick Terrell hash -= (toRemove + ZSTD_ROLL_HASH_CHAR_OFFSET) * primePower;
876e0c1b49fSNick Terrell hash *= prime8bytes;
877e0c1b49fSNick Terrell hash += toAdd + ZSTD_ROLL_HASH_CHAR_OFFSET;
878e0c1b49fSNick Terrell return hash;
879e0c1b49fSNick Terrell }
880e0c1b49fSNick Terrell
881e0c1b49fSNick Terrell /*-*************************************
882e0c1b49fSNick Terrell * Round buffer management
883e0c1b49fSNick Terrell ***************************************/
884e0c1b49fSNick Terrell #if (ZSTD_WINDOWLOG_MAX_64 > 31)
885e0c1b49fSNick Terrell # error "ZSTD_WINDOWLOG_MAX is too large : would overflow ZSTD_CURRENT_MAX"
886e0c1b49fSNick Terrell #endif
887e0c1b49fSNick Terrell /* Max current allowed */
888e0c1b49fSNick Terrell #define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX))
889e0c1b49fSNick Terrell /* Maximum chunk size before overflow correction needs to be called again */
890e0c1b49fSNick Terrell #define ZSTD_CHUNKSIZE_MAX \
891e0c1b49fSNick Terrell ( ((U32)-1) /* Maximum ending current index */ \
892e0c1b49fSNick Terrell - ZSTD_CURRENT_MAX) /* Maximum beginning lowLimit */
893e0c1b49fSNick Terrell
894e0c1b49fSNick Terrell /*
895e0c1b49fSNick Terrell * ZSTD_window_clear():
896e0c1b49fSNick Terrell * Clears the window containing the history by simply setting it to empty.
897e0c1b49fSNick Terrell */
ZSTD_window_clear(ZSTD_window_t * window)898e0c1b49fSNick Terrell MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window)
899e0c1b49fSNick Terrell {
900e0c1b49fSNick Terrell size_t const endT = (size_t)(window->nextSrc - window->base);
901e0c1b49fSNick Terrell U32 const end = (U32)endT;
902e0c1b49fSNick Terrell
903e0c1b49fSNick Terrell window->lowLimit = end;
904e0c1b49fSNick Terrell window->dictLimit = end;
905e0c1b49fSNick Terrell }
906e0c1b49fSNick Terrell
ZSTD_window_isEmpty(ZSTD_window_t const window)907*2aa14b1aSNick Terrell MEM_STATIC U32 ZSTD_window_isEmpty(ZSTD_window_t const window)
908*2aa14b1aSNick Terrell {
909*2aa14b1aSNick Terrell return window.dictLimit == ZSTD_WINDOW_START_INDEX &&
910*2aa14b1aSNick Terrell window.lowLimit == ZSTD_WINDOW_START_INDEX &&
911*2aa14b1aSNick Terrell (window.nextSrc - window.base) == ZSTD_WINDOW_START_INDEX;
912*2aa14b1aSNick Terrell }
913*2aa14b1aSNick Terrell
914e0c1b49fSNick Terrell /*
915e0c1b49fSNick Terrell * ZSTD_window_hasExtDict():
916e0c1b49fSNick Terrell * Returns non-zero if the window has a non-empty extDict.
917e0c1b49fSNick Terrell */
ZSTD_window_hasExtDict(ZSTD_window_t const window)918e0c1b49fSNick Terrell MEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window)
919e0c1b49fSNick Terrell {
920e0c1b49fSNick Terrell return window.lowLimit < window.dictLimit;
921e0c1b49fSNick Terrell }
922e0c1b49fSNick Terrell
923e0c1b49fSNick Terrell /*
924e0c1b49fSNick Terrell * ZSTD_matchState_dictMode():
925e0c1b49fSNick Terrell * Inspects the provided matchState and figures out what dictMode should be
926e0c1b49fSNick Terrell * passed to the compressor.
927e0c1b49fSNick Terrell */
ZSTD_matchState_dictMode(const ZSTD_matchState_t * ms)928e0c1b49fSNick Terrell MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms)
929e0c1b49fSNick Terrell {
930e0c1b49fSNick Terrell return ZSTD_window_hasExtDict(ms->window) ?
931e0c1b49fSNick Terrell ZSTD_extDict :
932e0c1b49fSNick Terrell ms->dictMatchState != NULL ?
933e0c1b49fSNick Terrell (ms->dictMatchState->dedicatedDictSearch ? ZSTD_dedicatedDictSearch : ZSTD_dictMatchState) :
934e0c1b49fSNick Terrell ZSTD_noDict;
935e0c1b49fSNick Terrell }
936e0c1b49fSNick Terrell
937*2aa14b1aSNick Terrell /* Defining this macro to non-zero tells zstd to run the overflow correction
938*2aa14b1aSNick Terrell * code much more frequently. This is very inefficient, and should only be
939*2aa14b1aSNick Terrell * used for tests and fuzzers.
940*2aa14b1aSNick Terrell */
941*2aa14b1aSNick Terrell #ifndef ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY
942*2aa14b1aSNick Terrell # ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
943*2aa14b1aSNick Terrell # define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 1
944*2aa14b1aSNick Terrell # else
945*2aa14b1aSNick Terrell # define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 0
946*2aa14b1aSNick Terrell # endif
947*2aa14b1aSNick Terrell #endif
948*2aa14b1aSNick Terrell
949*2aa14b1aSNick Terrell /*
950*2aa14b1aSNick Terrell * ZSTD_window_canOverflowCorrect():
951*2aa14b1aSNick Terrell * Returns non-zero if the indices are large enough for overflow correction
952*2aa14b1aSNick Terrell * to work correctly without impacting compression ratio.
953*2aa14b1aSNick Terrell */
ZSTD_window_canOverflowCorrect(ZSTD_window_t const window,U32 cycleLog,U32 maxDist,U32 loadedDictEnd,void const * src)954*2aa14b1aSNick Terrell MEM_STATIC U32 ZSTD_window_canOverflowCorrect(ZSTD_window_t const window,
955*2aa14b1aSNick Terrell U32 cycleLog,
956*2aa14b1aSNick Terrell U32 maxDist,
957*2aa14b1aSNick Terrell U32 loadedDictEnd,
958*2aa14b1aSNick Terrell void const* src)
959*2aa14b1aSNick Terrell {
960*2aa14b1aSNick Terrell U32 const cycleSize = 1u << cycleLog;
961*2aa14b1aSNick Terrell U32 const curr = (U32)((BYTE const*)src - window.base);
962*2aa14b1aSNick Terrell U32 const minIndexToOverflowCorrect = cycleSize
963*2aa14b1aSNick Terrell + MAX(maxDist, cycleSize)
964*2aa14b1aSNick Terrell + ZSTD_WINDOW_START_INDEX;
965*2aa14b1aSNick Terrell
966*2aa14b1aSNick Terrell /* Adjust the min index to backoff the overflow correction frequency,
967*2aa14b1aSNick Terrell * so we don't waste too much CPU in overflow correction. If this
968*2aa14b1aSNick Terrell * computation overflows we don't really care, we just need to make
969*2aa14b1aSNick Terrell * sure it is at least minIndexToOverflowCorrect.
970*2aa14b1aSNick Terrell */
971*2aa14b1aSNick Terrell U32 const adjustment = window.nbOverflowCorrections + 1;
972*2aa14b1aSNick Terrell U32 const adjustedIndex = MAX(minIndexToOverflowCorrect * adjustment,
973*2aa14b1aSNick Terrell minIndexToOverflowCorrect);
974*2aa14b1aSNick Terrell U32 const indexLargeEnough = curr > adjustedIndex;
975*2aa14b1aSNick Terrell
976*2aa14b1aSNick Terrell /* Only overflow correct early if the dictionary is invalidated already,
977*2aa14b1aSNick Terrell * so we don't hurt compression ratio.
978*2aa14b1aSNick Terrell */
979*2aa14b1aSNick Terrell U32 const dictionaryInvalidated = curr > maxDist + loadedDictEnd;
980*2aa14b1aSNick Terrell
981*2aa14b1aSNick Terrell return indexLargeEnough && dictionaryInvalidated;
982*2aa14b1aSNick Terrell }
983*2aa14b1aSNick Terrell
984e0c1b49fSNick Terrell /*
985e0c1b49fSNick Terrell * ZSTD_window_needOverflowCorrection():
986e0c1b49fSNick Terrell * Returns non-zero if the indices are getting too large and need overflow
987e0c1b49fSNick Terrell * protection.
988e0c1b49fSNick Terrell */
ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,U32 cycleLog,U32 maxDist,U32 loadedDictEnd,void const * src,void const * srcEnd)989e0c1b49fSNick Terrell MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,
990*2aa14b1aSNick Terrell U32 cycleLog,
991*2aa14b1aSNick Terrell U32 maxDist,
992*2aa14b1aSNick Terrell U32 loadedDictEnd,
993*2aa14b1aSNick Terrell void const* src,
994e0c1b49fSNick Terrell void const* srcEnd)
995e0c1b49fSNick Terrell {
996e0c1b49fSNick Terrell U32 const curr = (U32)((BYTE const*)srcEnd - window.base);
997*2aa14b1aSNick Terrell if (ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) {
998*2aa14b1aSNick Terrell if (ZSTD_window_canOverflowCorrect(window, cycleLog, maxDist, loadedDictEnd, src)) {
999*2aa14b1aSNick Terrell return 1;
1000*2aa14b1aSNick Terrell }
1001*2aa14b1aSNick Terrell }
1002e0c1b49fSNick Terrell return curr > ZSTD_CURRENT_MAX;
1003e0c1b49fSNick Terrell }
1004e0c1b49fSNick Terrell
1005e0c1b49fSNick Terrell /*
1006e0c1b49fSNick Terrell * ZSTD_window_correctOverflow():
1007e0c1b49fSNick Terrell * Reduces the indices to protect from index overflow.
1008e0c1b49fSNick Terrell * Returns the correction made to the indices, which must be applied to every
1009e0c1b49fSNick Terrell * stored index.
1010e0c1b49fSNick Terrell *
1011e0c1b49fSNick Terrell * The least significant cycleLog bits of the indices must remain the same,
1012e0c1b49fSNick Terrell * which may be 0. Every index up to maxDist in the past must be valid.
1013e0c1b49fSNick Terrell */
ZSTD_window_correctOverflow(ZSTD_window_t * window,U32 cycleLog,U32 maxDist,void const * src)1014e0c1b49fSNick Terrell MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
1015e0c1b49fSNick Terrell U32 maxDist, void const* src)
1016e0c1b49fSNick Terrell {
1017e0c1b49fSNick Terrell /* preemptive overflow correction:
1018e0c1b49fSNick Terrell * 1. correction is large enough:
1019e0c1b49fSNick Terrell * lowLimit > (3<<29) ==> current > 3<<29 + 1<<windowLog
1020e0c1b49fSNick Terrell * 1<<windowLog <= newCurrent < 1<<chainLog + 1<<windowLog
1021e0c1b49fSNick Terrell *
1022e0c1b49fSNick Terrell * current - newCurrent
1023e0c1b49fSNick Terrell * > (3<<29 + 1<<windowLog) - (1<<windowLog + 1<<chainLog)
1024e0c1b49fSNick Terrell * > (3<<29) - (1<<chainLog)
1025e0c1b49fSNick Terrell * > (3<<29) - (1<<30) (NOTE: chainLog <= 30)
1026e0c1b49fSNick Terrell * > 1<<29
1027e0c1b49fSNick Terrell *
1028e0c1b49fSNick Terrell * 2. (ip+ZSTD_CHUNKSIZE_MAX - cctx->base) doesn't overflow:
1029e0c1b49fSNick Terrell * After correction, current is less than (1<<chainLog + 1<<windowLog).
1030e0c1b49fSNick Terrell * In 64-bit mode we are safe, because we have 64-bit ptrdiff_t.
1031e0c1b49fSNick Terrell * In 32-bit mode we are safe, because (chainLog <= 29), so
1032e0c1b49fSNick Terrell * ip+ZSTD_CHUNKSIZE_MAX - cctx->base < 1<<32.
1033e0c1b49fSNick Terrell * 3. (cctx->lowLimit + 1<<windowLog) < 1<<32:
1034e0c1b49fSNick Terrell * windowLog <= 31 ==> 3<<29 + 1<<windowLog < 7<<29 < 1<<32.
1035e0c1b49fSNick Terrell */
1036*2aa14b1aSNick Terrell U32 const cycleSize = 1u << cycleLog;
1037*2aa14b1aSNick Terrell U32 const cycleMask = cycleSize - 1;
1038e0c1b49fSNick Terrell U32 const curr = (U32)((BYTE const*)src - window->base);
1039*2aa14b1aSNick Terrell U32 const currentCycle = curr & cycleMask;
1040*2aa14b1aSNick Terrell /* Ensure newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX. */
1041*2aa14b1aSNick Terrell U32 const currentCycleCorrection = currentCycle < ZSTD_WINDOW_START_INDEX
1042*2aa14b1aSNick Terrell ? MAX(cycleSize, ZSTD_WINDOW_START_INDEX)
1043*2aa14b1aSNick Terrell : 0;
1044*2aa14b1aSNick Terrell U32 const newCurrent = currentCycle
1045*2aa14b1aSNick Terrell + currentCycleCorrection
1046*2aa14b1aSNick Terrell + MAX(maxDist, cycleSize);
1047e0c1b49fSNick Terrell U32 const correction = curr - newCurrent;
1048*2aa14b1aSNick Terrell /* maxDist must be a power of two so that:
1049*2aa14b1aSNick Terrell * (newCurrent & cycleMask) == (curr & cycleMask)
1050*2aa14b1aSNick Terrell * This is required to not corrupt the chains / binary tree.
1051*2aa14b1aSNick Terrell */
1052*2aa14b1aSNick Terrell assert((maxDist & (maxDist - 1)) == 0);
1053*2aa14b1aSNick Terrell assert((curr & cycleMask) == (newCurrent & cycleMask));
1054e0c1b49fSNick Terrell assert(curr > newCurrent);
1055*2aa14b1aSNick Terrell if (!ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) {
1056e0c1b49fSNick Terrell /* Loose bound, should be around 1<<29 (see above) */
1057e0c1b49fSNick Terrell assert(correction > 1<<28);
1058*2aa14b1aSNick Terrell }
1059e0c1b49fSNick Terrell
1060e0c1b49fSNick Terrell window->base += correction;
1061e0c1b49fSNick Terrell window->dictBase += correction;
1062*2aa14b1aSNick Terrell if (window->lowLimit < correction + ZSTD_WINDOW_START_INDEX) {
1063*2aa14b1aSNick Terrell window->lowLimit = ZSTD_WINDOW_START_INDEX;
1064*2aa14b1aSNick Terrell } else {
1065*2aa14b1aSNick Terrell window->lowLimit -= correction;
1066*2aa14b1aSNick Terrell }
1067*2aa14b1aSNick Terrell if (window->dictLimit < correction + ZSTD_WINDOW_START_INDEX) {
1068*2aa14b1aSNick Terrell window->dictLimit = ZSTD_WINDOW_START_INDEX;
1069*2aa14b1aSNick Terrell } else {
1070*2aa14b1aSNick Terrell window->dictLimit -= correction;
1071*2aa14b1aSNick Terrell }
1072e0c1b49fSNick Terrell
1073e0c1b49fSNick Terrell /* Ensure we can still reference the full window. */
1074e0c1b49fSNick Terrell assert(newCurrent >= maxDist);
1075*2aa14b1aSNick Terrell assert(newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX);
1076e0c1b49fSNick Terrell /* Ensure that lowLimit and dictLimit didn't underflow. */
1077e0c1b49fSNick Terrell assert(window->lowLimit <= newCurrent);
1078e0c1b49fSNick Terrell assert(window->dictLimit <= newCurrent);
1079e0c1b49fSNick Terrell
1080*2aa14b1aSNick Terrell ++window->nbOverflowCorrections;
1081*2aa14b1aSNick Terrell
1082e0c1b49fSNick Terrell DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction,
1083e0c1b49fSNick Terrell window->lowLimit);
1084e0c1b49fSNick Terrell return correction;
1085e0c1b49fSNick Terrell }
1086e0c1b49fSNick Terrell
1087e0c1b49fSNick Terrell /*
1088e0c1b49fSNick Terrell * ZSTD_window_enforceMaxDist():
1089e0c1b49fSNick Terrell * Updates lowLimit so that:
1090e0c1b49fSNick Terrell * (srcEnd - base) - lowLimit == maxDist + loadedDictEnd
1091e0c1b49fSNick Terrell *
1092e0c1b49fSNick Terrell * It ensures index is valid as long as index >= lowLimit.
1093e0c1b49fSNick Terrell * This must be called before a block compression call.
1094e0c1b49fSNick Terrell *
1095e0c1b49fSNick Terrell * loadedDictEnd is only defined if a dictionary is in use for current compression.
1096e0c1b49fSNick Terrell * As the name implies, loadedDictEnd represents the index at end of dictionary.
1097e0c1b49fSNick Terrell * The value lies within context's referential, it can be directly compared to blockEndIdx.
1098e0c1b49fSNick Terrell *
1099e0c1b49fSNick Terrell * If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0.
1100e0c1b49fSNick Terrell * If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit.
1101e0c1b49fSNick Terrell * This is because dictionaries are allowed to be referenced fully
1102e0c1b49fSNick Terrell * as long as the last byte of the dictionary is in the window.
1103e0c1b49fSNick Terrell * Once input has progressed beyond window size, dictionary cannot be referenced anymore.
1104e0c1b49fSNick Terrell *
1105e0c1b49fSNick Terrell * In normal dict mode, the dictionary lies between lowLimit and dictLimit.
1106e0c1b49fSNick Terrell * In dictMatchState mode, lowLimit and dictLimit are the same,
1107e0c1b49fSNick Terrell * and the dictionary is below them.
1108e0c1b49fSNick Terrell * forceWindow and dictMatchState are therefore incompatible.
1109e0c1b49fSNick Terrell */
1110e0c1b49fSNick Terrell MEM_STATIC void
ZSTD_window_enforceMaxDist(ZSTD_window_t * window,const void * blockEnd,U32 maxDist,U32 * loadedDictEndPtr,const ZSTD_matchState_t ** dictMatchStatePtr)1111e0c1b49fSNick Terrell ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
1112e0c1b49fSNick Terrell const void* blockEnd,
1113e0c1b49fSNick Terrell U32 maxDist,
1114e0c1b49fSNick Terrell U32* loadedDictEndPtr,
1115e0c1b49fSNick Terrell const ZSTD_matchState_t** dictMatchStatePtr)
1116e0c1b49fSNick Terrell {
1117e0c1b49fSNick Terrell U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
1118e0c1b49fSNick Terrell U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
1119e0c1b49fSNick Terrell DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
1120e0c1b49fSNick Terrell (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
1121e0c1b49fSNick Terrell
1122e0c1b49fSNick Terrell /* - When there is no dictionary : loadedDictEnd == 0.
1123e0c1b49fSNick Terrell In which case, the test (blockEndIdx > maxDist) is merely to avoid
1124e0c1b49fSNick Terrell overflowing next operation `newLowLimit = blockEndIdx - maxDist`.
1125e0c1b49fSNick Terrell - When there is a standard dictionary :
1126e0c1b49fSNick Terrell Index referential is copied from the dictionary,
1127e0c1b49fSNick Terrell which means it starts from 0.
1128e0c1b49fSNick Terrell In which case, loadedDictEnd == dictSize,
1129e0c1b49fSNick Terrell and it makes sense to compare `blockEndIdx > maxDist + dictSize`
1130e0c1b49fSNick Terrell since `blockEndIdx` also starts from zero.
1131e0c1b49fSNick Terrell - When there is an attached dictionary :
1132e0c1b49fSNick Terrell loadedDictEnd is expressed within the referential of the context,
1133e0c1b49fSNick Terrell so it can be directly compared against blockEndIdx.
1134e0c1b49fSNick Terrell */
1135e0c1b49fSNick Terrell if (blockEndIdx > maxDist + loadedDictEnd) {
1136e0c1b49fSNick Terrell U32 const newLowLimit = blockEndIdx - maxDist;
1137e0c1b49fSNick Terrell if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit;
1138e0c1b49fSNick Terrell if (window->dictLimit < window->lowLimit) {
1139e0c1b49fSNick Terrell DEBUGLOG(5, "Update dictLimit to match lowLimit, from %u to %u",
1140e0c1b49fSNick Terrell (unsigned)window->dictLimit, (unsigned)window->lowLimit);
1141e0c1b49fSNick Terrell window->dictLimit = window->lowLimit;
1142e0c1b49fSNick Terrell }
1143e0c1b49fSNick Terrell /* On reaching window size, dictionaries are invalidated */
1144e0c1b49fSNick Terrell if (loadedDictEndPtr) *loadedDictEndPtr = 0;
1145e0c1b49fSNick Terrell if (dictMatchStatePtr) *dictMatchStatePtr = NULL;
1146e0c1b49fSNick Terrell }
1147e0c1b49fSNick Terrell }
1148e0c1b49fSNick Terrell
1149e0c1b49fSNick Terrell /* Similar to ZSTD_window_enforceMaxDist(),
1150e0c1b49fSNick Terrell * but only invalidates dictionary
1151e0c1b49fSNick Terrell * when input progresses beyond window size.
1152e0c1b49fSNick Terrell * assumption : loadedDictEndPtr and dictMatchStatePtr are valid (non NULL)
1153e0c1b49fSNick Terrell * loadedDictEnd uses same referential as window->base
1154e0c1b49fSNick Terrell * maxDist is the window size */
1155e0c1b49fSNick Terrell MEM_STATIC void
ZSTD_checkDictValidity(const ZSTD_window_t * window,const void * blockEnd,U32 maxDist,U32 * loadedDictEndPtr,const ZSTD_matchState_t ** dictMatchStatePtr)1156e0c1b49fSNick Terrell ZSTD_checkDictValidity(const ZSTD_window_t* window,
1157e0c1b49fSNick Terrell const void* blockEnd,
1158e0c1b49fSNick Terrell U32 maxDist,
1159e0c1b49fSNick Terrell U32* loadedDictEndPtr,
1160e0c1b49fSNick Terrell const ZSTD_matchState_t** dictMatchStatePtr)
1161e0c1b49fSNick Terrell {
1162e0c1b49fSNick Terrell assert(loadedDictEndPtr != NULL);
1163e0c1b49fSNick Terrell assert(dictMatchStatePtr != NULL);
1164e0c1b49fSNick Terrell { U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
1165e0c1b49fSNick Terrell U32 const loadedDictEnd = *loadedDictEndPtr;
1166e0c1b49fSNick Terrell DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
1167e0c1b49fSNick Terrell (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
1168e0c1b49fSNick Terrell assert(blockEndIdx >= loadedDictEnd);
1169e0c1b49fSNick Terrell
1170e0c1b49fSNick Terrell if (blockEndIdx > loadedDictEnd + maxDist) {
1171e0c1b49fSNick Terrell /* On reaching window size, dictionaries are invalidated.
1172e0c1b49fSNick Terrell * For simplification, if window size is reached anywhere within next block,
1173e0c1b49fSNick Terrell * the dictionary is invalidated for the full block.
1174e0c1b49fSNick Terrell */
1175e0c1b49fSNick Terrell DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)");
1176e0c1b49fSNick Terrell *loadedDictEndPtr = 0;
1177e0c1b49fSNick Terrell *dictMatchStatePtr = NULL;
1178e0c1b49fSNick Terrell } else {
1179e0c1b49fSNick Terrell if (*loadedDictEndPtr != 0) {
1180e0c1b49fSNick Terrell DEBUGLOG(6, "dictionary considered valid for current block");
1181e0c1b49fSNick Terrell } } }
1182e0c1b49fSNick Terrell }
1183e0c1b49fSNick Terrell
ZSTD_window_init(ZSTD_window_t * window)1184e0c1b49fSNick Terrell MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {
1185e0c1b49fSNick Terrell ZSTD_memset(window, 0, sizeof(*window));
1186e0c1b49fSNick Terrell window->base = (BYTE const*)" ";
1187e0c1b49fSNick Terrell window->dictBase = (BYTE const*)" ";
1188*2aa14b1aSNick Terrell ZSTD_STATIC_ASSERT(ZSTD_DUBT_UNSORTED_MARK < ZSTD_WINDOW_START_INDEX); /* Start above ZSTD_DUBT_UNSORTED_MARK */
1189*2aa14b1aSNick Terrell window->dictLimit = ZSTD_WINDOW_START_INDEX; /* start from >0, so that 1st position is valid */
1190*2aa14b1aSNick Terrell window->lowLimit = ZSTD_WINDOW_START_INDEX; /* it ensures first and later CCtx usages compress the same */
1191*2aa14b1aSNick Terrell window->nextSrc = window->base + ZSTD_WINDOW_START_INDEX; /* see issue #1241 */
1192*2aa14b1aSNick Terrell window->nbOverflowCorrections = 0;
1193e0c1b49fSNick Terrell }
1194e0c1b49fSNick Terrell
1195e0c1b49fSNick Terrell /*
1196e0c1b49fSNick Terrell * ZSTD_window_update():
1197e0c1b49fSNick Terrell * Updates the window by appending [src, src + srcSize) to the window.
1198e0c1b49fSNick Terrell * If it is not contiguous, the current prefix becomes the extDict, and we
1199e0c1b49fSNick Terrell * forget about the extDict. Handles overlap of the prefix and extDict.
1200e0c1b49fSNick Terrell * Returns non-zero if the segment is contiguous.
1201e0c1b49fSNick Terrell */
ZSTD_window_update(ZSTD_window_t * window,void const * src,size_t srcSize,int forceNonContiguous)1202e0c1b49fSNick Terrell MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
1203*2aa14b1aSNick Terrell void const* src, size_t srcSize,
1204*2aa14b1aSNick Terrell int forceNonContiguous)
1205e0c1b49fSNick Terrell {
1206e0c1b49fSNick Terrell BYTE const* const ip = (BYTE const*)src;
1207e0c1b49fSNick Terrell U32 contiguous = 1;
1208e0c1b49fSNick Terrell DEBUGLOG(5, "ZSTD_window_update");
1209e0c1b49fSNick Terrell if (srcSize == 0)
1210e0c1b49fSNick Terrell return contiguous;
1211e0c1b49fSNick Terrell assert(window->base != NULL);
1212e0c1b49fSNick Terrell assert(window->dictBase != NULL);
1213e0c1b49fSNick Terrell /* Check if blocks follow each other */
1214*2aa14b1aSNick Terrell if (src != window->nextSrc || forceNonContiguous) {
1215e0c1b49fSNick Terrell /* not contiguous */
1216e0c1b49fSNick Terrell size_t const distanceFromBase = (size_t)(window->nextSrc - window->base);
1217e0c1b49fSNick Terrell DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit);
1218e0c1b49fSNick Terrell window->lowLimit = window->dictLimit;
1219e0c1b49fSNick Terrell assert(distanceFromBase == (size_t)(U32)distanceFromBase); /* should never overflow */
1220e0c1b49fSNick Terrell window->dictLimit = (U32)distanceFromBase;
1221e0c1b49fSNick Terrell window->dictBase = window->base;
1222e0c1b49fSNick Terrell window->base = ip - distanceFromBase;
1223e0c1b49fSNick Terrell /* ms->nextToUpdate = window->dictLimit; */
1224e0c1b49fSNick Terrell if (window->dictLimit - window->lowLimit < HASH_READ_SIZE) window->lowLimit = window->dictLimit; /* too small extDict */
1225e0c1b49fSNick Terrell contiguous = 0;
1226e0c1b49fSNick Terrell }
1227e0c1b49fSNick Terrell window->nextSrc = ip + srcSize;
1228e0c1b49fSNick Terrell /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */
1229e0c1b49fSNick Terrell if ( (ip+srcSize > window->dictBase + window->lowLimit)
1230e0c1b49fSNick Terrell & (ip < window->dictBase + window->dictLimit)) {
1231e0c1b49fSNick Terrell ptrdiff_t const highInputIdx = (ip + srcSize) - window->dictBase;
1232e0c1b49fSNick Terrell U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)window->dictLimit) ? window->dictLimit : (U32)highInputIdx;
1233e0c1b49fSNick Terrell window->lowLimit = lowLimitMax;
1234e0c1b49fSNick Terrell DEBUGLOG(5, "Overlapping extDict and input : new lowLimit = %u", window->lowLimit);
1235e0c1b49fSNick Terrell }
1236e0c1b49fSNick Terrell return contiguous;
1237e0c1b49fSNick Terrell }
1238e0c1b49fSNick Terrell
1239e0c1b49fSNick Terrell /*
1240e0c1b49fSNick Terrell * Returns the lowest allowed match index. It may either be in the ext-dict or the prefix.
1241e0c1b49fSNick Terrell */
ZSTD_getLowestMatchIndex(const ZSTD_matchState_t * ms,U32 curr,unsigned windowLog)1242e0c1b49fSNick Terrell MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog)
1243e0c1b49fSNick Terrell {
1244e0c1b49fSNick Terrell U32 const maxDistance = 1U << windowLog;
1245e0c1b49fSNick Terrell U32 const lowestValid = ms->window.lowLimit;
1246e0c1b49fSNick Terrell U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
1247e0c1b49fSNick Terrell U32 const isDictionary = (ms->loadedDictEnd != 0);
1248e0c1b49fSNick Terrell /* When using a dictionary the entire dictionary is valid if a single byte of the dictionary
1249e0c1b49fSNick Terrell * is within the window. We invalidate the dictionary (and set loadedDictEnd to 0) when it isn't
1250e0c1b49fSNick Terrell * valid for the entire block. So this check is sufficient to find the lowest valid match index.
1251e0c1b49fSNick Terrell */
1252e0c1b49fSNick Terrell U32 const matchLowest = isDictionary ? lowestValid : withinWindow;
1253e0c1b49fSNick Terrell return matchLowest;
1254e0c1b49fSNick Terrell }
1255e0c1b49fSNick Terrell
1256e0c1b49fSNick Terrell /*
1257e0c1b49fSNick Terrell * Returns the lowest allowed match index in the prefix.
1258e0c1b49fSNick Terrell */
ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t * ms,U32 curr,unsigned windowLog)1259e0c1b49fSNick Terrell MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog)
1260e0c1b49fSNick Terrell {
1261e0c1b49fSNick Terrell U32 const maxDistance = 1U << windowLog;
1262e0c1b49fSNick Terrell U32 const lowestValid = ms->window.dictLimit;
1263e0c1b49fSNick Terrell U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
1264e0c1b49fSNick Terrell U32 const isDictionary = (ms->loadedDictEnd != 0);
1265e0c1b49fSNick Terrell /* When computing the lowest prefix index we need to take the dictionary into account to handle
1266e0c1b49fSNick Terrell * the edge case where the dictionary and the source are contiguous in memory.
1267e0c1b49fSNick Terrell */
1268e0c1b49fSNick Terrell U32 const matchLowest = isDictionary ? lowestValid : withinWindow;
1269e0c1b49fSNick Terrell return matchLowest;
1270e0c1b49fSNick Terrell }
1271e0c1b49fSNick Terrell
1272e0c1b49fSNick Terrell
1273e0c1b49fSNick Terrell
1274e0c1b49fSNick Terrell /* debug functions */
1275e0c1b49fSNick Terrell #if (DEBUGLEVEL>=2)
1276e0c1b49fSNick Terrell
ZSTD_fWeight(U32 rawStat)1277e0c1b49fSNick Terrell MEM_STATIC double ZSTD_fWeight(U32 rawStat)
1278e0c1b49fSNick Terrell {
1279e0c1b49fSNick Terrell U32 const fp_accuracy = 8;
1280e0c1b49fSNick Terrell U32 const fp_multiplier = (1 << fp_accuracy);
1281e0c1b49fSNick Terrell U32 const newStat = rawStat + 1;
1282e0c1b49fSNick Terrell U32 const hb = ZSTD_highbit32(newStat);
1283e0c1b49fSNick Terrell U32 const BWeight = hb * fp_multiplier;
1284e0c1b49fSNick Terrell U32 const FWeight = (newStat << fp_accuracy) >> hb;
1285e0c1b49fSNick Terrell U32 const weight = BWeight + FWeight;
1286e0c1b49fSNick Terrell assert(hb + fp_accuracy < 31);
1287e0c1b49fSNick Terrell return (double)weight / fp_multiplier;
1288e0c1b49fSNick Terrell }
1289e0c1b49fSNick Terrell
1290e0c1b49fSNick Terrell /* display a table content,
1291e0c1b49fSNick Terrell * listing each element, its frequency, and its predicted bit cost */
ZSTD_debugTable(const U32 * table,U32 max)1292e0c1b49fSNick Terrell MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
1293e0c1b49fSNick Terrell {
1294e0c1b49fSNick Terrell unsigned u, sum;
1295e0c1b49fSNick Terrell for (u=0, sum=0; u<=max; u++) sum += table[u];
1296e0c1b49fSNick Terrell DEBUGLOG(2, "total nb elts: %u", sum);
1297e0c1b49fSNick Terrell for (u=0; u<=max; u++) {
1298e0c1b49fSNick Terrell DEBUGLOG(2, "%2u: %5u (%.2f)",
1299e0c1b49fSNick Terrell u, table[u], ZSTD_fWeight(sum) - ZSTD_fWeight(table[u]) );
1300e0c1b49fSNick Terrell }
1301e0c1b49fSNick Terrell }
1302e0c1b49fSNick Terrell
1303e0c1b49fSNick Terrell #endif
1304e0c1b49fSNick Terrell
1305e0c1b49fSNick Terrell
1306e0c1b49fSNick Terrell
1307e0c1b49fSNick Terrell /* ===============================================================
1308e0c1b49fSNick Terrell * Shared internal declarations
1309e0c1b49fSNick Terrell * These prototypes may be called from sources not in lib/compress
1310e0c1b49fSNick Terrell * =============================================================== */
1311e0c1b49fSNick Terrell
1312e0c1b49fSNick Terrell /* ZSTD_loadCEntropy() :
1313e0c1b49fSNick Terrell * dict : must point at beginning of a valid zstd dictionary.
1314e0c1b49fSNick Terrell * return : size of dictionary header (size of magic number + dict ID + entropy tables)
1315e0c1b49fSNick Terrell * assumptions : magic number supposed already checked
1316e0c1b49fSNick Terrell * and dictSize >= 8 */
1317e0c1b49fSNick Terrell size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
1318e0c1b49fSNick Terrell const void* const dict, size_t dictSize);
1319e0c1b49fSNick Terrell
1320e0c1b49fSNick Terrell void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs);
1321e0c1b49fSNick Terrell
1322e0c1b49fSNick Terrell /* ==============================================================
1323e0c1b49fSNick Terrell * Private declarations
1324e0c1b49fSNick Terrell * These prototypes shall only be called from within lib/compress
1325e0c1b49fSNick Terrell * ============================================================== */
1326e0c1b49fSNick Terrell
1327e0c1b49fSNick Terrell /* ZSTD_getCParamsFromCCtxParams() :
1328e0c1b49fSNick Terrell * cParams are built depending on compressionLevel, src size hints,
1329e0c1b49fSNick Terrell * LDM and manually set compression parameters.
1330e0c1b49fSNick Terrell * Note: srcSizeHint == 0 means 0!
1331e0c1b49fSNick Terrell */
1332e0c1b49fSNick Terrell ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
1333e0c1b49fSNick Terrell const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);
1334e0c1b49fSNick Terrell
1335e0c1b49fSNick Terrell /*! ZSTD_initCStream_internal() :
1336e0c1b49fSNick Terrell * Private use only. Init streaming operation.
1337e0c1b49fSNick Terrell * expects params to be valid.
1338e0c1b49fSNick Terrell * must receive dict, or cdict, or none, but not both.
1339e0c1b49fSNick Terrell * @return : 0, or an error code */
1340e0c1b49fSNick Terrell size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
1341e0c1b49fSNick Terrell const void* dict, size_t dictSize,
1342e0c1b49fSNick Terrell const ZSTD_CDict* cdict,
1343e0c1b49fSNick Terrell const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize);
1344e0c1b49fSNick Terrell
1345e0c1b49fSNick Terrell void ZSTD_resetSeqStore(seqStore_t* ssPtr);
1346e0c1b49fSNick Terrell
1347e0c1b49fSNick Terrell /*! ZSTD_getCParamsFromCDict() :
1348e0c1b49fSNick Terrell * as the name implies */
1349e0c1b49fSNick Terrell ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict);
1350e0c1b49fSNick Terrell
1351e0c1b49fSNick Terrell /* ZSTD_compressBegin_advanced_internal() :
1352e0c1b49fSNick Terrell * Private use only. To be called from zstdmt_compress.c. */
1353e0c1b49fSNick Terrell size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
1354e0c1b49fSNick Terrell const void* dict, size_t dictSize,
1355e0c1b49fSNick Terrell ZSTD_dictContentType_e dictContentType,
1356e0c1b49fSNick Terrell ZSTD_dictTableLoadMethod_e dtlm,
1357e0c1b49fSNick Terrell const ZSTD_CDict* cdict,
1358e0c1b49fSNick Terrell const ZSTD_CCtx_params* params,
1359e0c1b49fSNick Terrell unsigned long long pledgedSrcSize);
1360e0c1b49fSNick Terrell
1361e0c1b49fSNick Terrell /* ZSTD_compress_advanced_internal() :
1362e0c1b49fSNick Terrell * Private use only. To be called from zstdmt_compress.c. */
1363e0c1b49fSNick Terrell size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx,
1364e0c1b49fSNick Terrell void* dst, size_t dstCapacity,
1365e0c1b49fSNick Terrell const void* src, size_t srcSize,
1366e0c1b49fSNick Terrell const void* dict,size_t dictSize,
1367e0c1b49fSNick Terrell const ZSTD_CCtx_params* params);
1368e0c1b49fSNick Terrell
1369e0c1b49fSNick Terrell
1370e0c1b49fSNick Terrell /* ZSTD_writeLastEmptyBlock() :
1371e0c1b49fSNick Terrell * output an empty Block with end-of-frame mark to complete a frame
1372e0c1b49fSNick Terrell * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h))
1373e0c1b49fSNick Terrell * or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize)
1374e0c1b49fSNick Terrell */
1375e0c1b49fSNick Terrell size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity);
1376e0c1b49fSNick Terrell
1377e0c1b49fSNick Terrell
1378e0c1b49fSNick Terrell /* ZSTD_referenceExternalSequences() :
1379e0c1b49fSNick Terrell * Must be called before starting a compression operation.
1380e0c1b49fSNick Terrell * seqs must parse a prefix of the source.
1381e0c1b49fSNick Terrell * This cannot be used when long range matching is enabled.
1382e0c1b49fSNick Terrell * Zstd will use these sequences, and pass the literals to a secondary block
1383e0c1b49fSNick Terrell * compressor.
1384e0c1b49fSNick Terrell * @return : An error code on failure.
1385e0c1b49fSNick Terrell * NOTE: seqs are not verified! Invalid sequences can cause out-of-bounds memory
1386e0c1b49fSNick Terrell * access and data corruption.
1387e0c1b49fSNick Terrell */
1388e0c1b49fSNick Terrell size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq);
1389e0c1b49fSNick Terrell
1390e0c1b49fSNick Terrell /* ZSTD_cycleLog() :
1391e0c1b49fSNick Terrell * condition for correct operation : hashLog > 1 */
1392e0c1b49fSNick Terrell U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat);
1393e0c1b49fSNick Terrell
1394e0c1b49fSNick Terrell /* ZSTD_CCtx_trace() :
1395e0c1b49fSNick Terrell * Trace the end of a compression call.
1396e0c1b49fSNick Terrell */
1397e0c1b49fSNick Terrell void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize);
1398e0c1b49fSNick Terrell
1399e0c1b49fSNick Terrell #endif /* ZSTD_COMPRESS_H */
1400