xref: /openbmc/linux/lib/zstd/compress/zstd_compress_internal.h (revision e65e175b07bef5974045cc42238de99057669ca7)
1 /*
2  * Copyright (c) Yann Collet, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under both the BSD-style license (found in the
6  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7  * in the COPYING file in the root directory of this source tree).
8  * You may select, at your option, one of the above-listed licenses.
9  */
10 
11 /* This header contains definitions
12  * that shall **only** be used by modules within lib/compress.
13  */
14 
15 #ifndef ZSTD_COMPRESS_H
16 #define ZSTD_COMPRESS_H
17 
18 /*-*************************************
19 *  Dependencies
20 ***************************************/
21 #include "../common/zstd_internal.h"
22 #include "zstd_cwksp.h"
23 
24 
25 /*-*************************************
26 *  Constants
27 ***************************************/
28 #define kSearchStrength      8
29 #define HASH_READ_SIZE       8
30 #define ZSTD_DUBT_UNSORTED_MARK 1   /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted".
31                                        It could be confused for a real successor at index "1", if sorted as larger than its predecessor.
32                                        It's not a big deal though : candidate will just be sorted again.
33                                        Additionally, candidate position 1 will be lost.
34                                        But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
35                                        The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy.
36                                        This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
37 
38 
39 /*-*************************************
40 *  Context memory management
41 ***************************************/
42 typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e;
43 typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage;
44 
45 typedef struct ZSTD_prefixDict_s {
46     const void* dict;
47     size_t dictSize;
48     ZSTD_dictContentType_e dictContentType;
49 } ZSTD_prefixDict;
50 
51 typedef struct {
52     void* dictBuffer;
53     void const* dict;
54     size_t dictSize;
55     ZSTD_dictContentType_e dictContentType;
56     ZSTD_CDict* cdict;
57 } ZSTD_localDict;
58 
59 typedef struct {
60     HUF_CElt CTable[HUF_CTABLE_SIZE_ST(255)];
61     HUF_repeat repeatMode;
62 } ZSTD_hufCTables_t;
63 
64 typedef struct {
65     FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
66     FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
67     FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
68     FSE_repeat offcode_repeatMode;
69     FSE_repeat matchlength_repeatMode;
70     FSE_repeat litlength_repeatMode;
71 } ZSTD_fseCTables_t;
72 
73 typedef struct {
74     ZSTD_hufCTables_t huf;
75     ZSTD_fseCTables_t fse;
76 } ZSTD_entropyCTables_t;
77 
78 /* *********************************************
79 *  Entropy buffer statistics structs and funcs *
80 ***********************************************/
81 /* ZSTD_hufCTablesMetadata_t :
82  *  Stores Literals Block Type for a super-block in hType, and
83  *  huffman tree description in hufDesBuffer.
84  *  hufDesSize refers to the size of huffman tree description in bytes.
85  *  This metadata is populated in ZSTD_buildBlockEntropyStats_literals() */
86 typedef struct {
87     symbolEncodingType_e hType;
88     BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE];
89     size_t hufDesSize;
90 } ZSTD_hufCTablesMetadata_t;
91 
92 /* ZSTD_fseCTablesMetadata_t :
93  *  Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and
94  *  fse tables in fseTablesBuffer.
95  *  fseTablesSize refers to the size of fse tables in bytes.
96  *  This metadata is populated in ZSTD_buildBlockEntropyStats_sequences() */
97 typedef struct {
98     symbolEncodingType_e llType;
99     symbolEncodingType_e ofType;
100     symbolEncodingType_e mlType;
101     BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE];
102     size_t fseTablesSize;
103     size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */
104 } ZSTD_fseCTablesMetadata_t;
105 
106 typedef struct {
107     ZSTD_hufCTablesMetadata_t hufMetadata;
108     ZSTD_fseCTablesMetadata_t fseMetadata;
109 } ZSTD_entropyCTablesMetadata_t;
110 
111 /* ZSTD_buildBlockEntropyStats() :
112  *  Builds entropy for the block.
113  *  @return : 0 on success or error code */
114 size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,
115                              const ZSTD_entropyCTables_t* prevEntropy,
116                                    ZSTD_entropyCTables_t* nextEntropy,
117                              const ZSTD_CCtx_params* cctxParams,
118                                    ZSTD_entropyCTablesMetadata_t* entropyMetadata,
119                                    void* workspace, size_t wkspSize);
120 
121 /* *******************************
122 *  Compression internals structs *
123 *********************************/
124 
125 typedef struct {
126     U32 off;            /* Offset sumtype code for the match, using ZSTD_storeSeq() format */
127     U32 len;            /* Raw length of match */
128 } ZSTD_match_t;
129 
130 typedef struct {
131     U32 offset;         /* Offset of sequence */
132     U32 litLength;      /* Length of literals prior to match */
133     U32 matchLength;    /* Raw length of match */
134 } rawSeq;
135 
136 typedef struct {
137   rawSeq* seq;          /* The start of the sequences */
138   size_t pos;           /* The index in seq where reading stopped. pos <= size. */
139   size_t posInSequence; /* The position within the sequence at seq[pos] where reading
140                            stopped. posInSequence <= seq[pos].litLength + seq[pos].matchLength */
141   size_t size;          /* The number of sequences. <= capacity. */
142   size_t capacity;      /* The capacity starting from `seq` pointer */
143 } rawSeqStore_t;
144 
145 UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0};
146 
147 typedef struct {
148     int price;
149     U32 off;
150     U32 mlen;
151     U32 litlen;
152     U32 rep[ZSTD_REP_NUM];
153 } ZSTD_optimal_t;
154 
155 typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e;
156 
157 typedef struct {
158     /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */
159     unsigned* litFreq;           /* table of literals statistics, of size 256 */
160     unsigned* litLengthFreq;     /* table of litLength statistics, of size (MaxLL+1) */
161     unsigned* matchLengthFreq;   /* table of matchLength statistics, of size (MaxML+1) */
162     unsigned* offCodeFreq;       /* table of offCode statistics, of size (MaxOff+1) */
163     ZSTD_match_t* matchTable;    /* list of found matches, of size ZSTD_OPT_NUM+1 */
164     ZSTD_optimal_t* priceTable;  /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */
165 
166     U32  litSum;                 /* nb of literals */
167     U32  litLengthSum;           /* nb of litLength codes */
168     U32  matchLengthSum;         /* nb of matchLength codes */
169     U32  offCodeSum;             /* nb of offset codes */
170     U32  litSumBasePrice;        /* to compare to log2(litfreq) */
171     U32  litLengthSumBasePrice;  /* to compare to log2(llfreq)  */
172     U32  matchLengthSumBasePrice;/* to compare to log2(mlfreq)  */
173     U32  offCodeSumBasePrice;    /* to compare to log2(offreq)  */
174     ZSTD_OptPrice_e priceType;   /* prices can be determined dynamically, or follow a pre-defined cost structure */
175     const ZSTD_entropyCTables_t* symbolCosts;  /* pre-calculated dictionary statistics */
176     ZSTD_paramSwitch_e literalCompressionMode;
177 } optState_t;
178 
179 typedef struct {
180   ZSTD_entropyCTables_t entropy;
181   U32 rep[ZSTD_REP_NUM];
182 } ZSTD_compressedBlockState_t;
183 
184 typedef struct {
185     BYTE const* nextSrc;       /* next block here to continue on current prefix */
186     BYTE const* base;          /* All regular indexes relative to this position */
187     BYTE const* dictBase;      /* extDict indexes relative to this position */
188     U32 dictLimit;             /* below that point, need extDict */
189     U32 lowLimit;              /* below that point, no more valid data */
190     U32 nbOverflowCorrections; /* Number of times overflow correction has run since
191                                 * ZSTD_window_init(). Useful for debugging coredumps
192                                 * and for ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY.
193                                 */
194 } ZSTD_window_t;
195 
196 #define ZSTD_WINDOW_START_INDEX 2
197 
198 typedef struct ZSTD_matchState_t ZSTD_matchState_t;
199 
200 #define ZSTD_ROW_HASH_CACHE_SIZE 8       /* Size of prefetching hash cache for row-based matchfinder */
201 
202 struct ZSTD_matchState_t {
203     ZSTD_window_t window;   /* State for window round buffer management */
204     U32 loadedDictEnd;      /* index of end of dictionary, within context's referential.
205                              * When loadedDictEnd != 0, a dictionary is in use, and still valid.
206                              * This relies on a mechanism to set loadedDictEnd=0 when dictionary is no longer within distance.
207                              * Such mechanism is provided within ZSTD_window_enforceMaxDist() and ZSTD_checkDictValidity().
208                              * When dict referential is copied into active context (i.e. not attached),
209                              * loadedDictEnd == dictSize, since referential starts from zero.
210                              */
211     U32 nextToUpdate;       /* index from which to continue table update */
212     U32 hashLog3;           /* dispatch table for matches of len==3 : larger == faster, more memory */
213 
214     U32 rowHashLog;                          /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/
215     U16* tagTable;                           /* For row-based matchFinder: A row-based table containing the hashes and head index. */
216     U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */
217 
218     U32* hashTable;
219     U32* hashTable3;
220     U32* chainTable;
221 
222     U32 forceNonContiguous; /* Non-zero if we should force non-contiguous load for the next window update. */
223 
224     int dedicatedDictSearch;  /* Indicates whether this matchState is using the
225                                * dedicated dictionary search structure.
226                                */
227     optState_t opt;         /* optimal parser state */
228     const ZSTD_matchState_t* dictMatchState;
229     ZSTD_compressionParameters cParams;
230     const rawSeqStore_t* ldmSeqStore;
231 };
232 
233 typedef struct {
234     ZSTD_compressedBlockState_t* prevCBlock;
235     ZSTD_compressedBlockState_t* nextCBlock;
236     ZSTD_matchState_t matchState;
237 } ZSTD_blockState_t;
238 
239 typedef struct {
240     U32 offset;
241     U32 checksum;
242 } ldmEntry_t;
243 
244 typedef struct {
245     BYTE const* split;
246     U32 hash;
247     U32 checksum;
248     ldmEntry_t* bucket;
249 } ldmMatchCandidate_t;
250 
251 #define LDM_BATCH_SIZE 64
252 
253 typedef struct {
254     ZSTD_window_t window;   /* State for the window round buffer management */
255     ldmEntry_t* hashTable;
256     U32 loadedDictEnd;
257     BYTE* bucketOffsets;    /* Next position in bucket to insert entry */
258     size_t splitIndices[LDM_BATCH_SIZE];
259     ldmMatchCandidate_t matchCandidates[LDM_BATCH_SIZE];
260 } ldmState_t;
261 
262 typedef struct {
263     ZSTD_paramSwitch_e enableLdm; /* ZSTD_ps_enable to enable LDM. ZSTD_ps_auto by default */
264     U32 hashLog;            /* Log size of hashTable */
265     U32 bucketSizeLog;      /* Log bucket size for collision resolution, at most 8 */
266     U32 minMatchLength;     /* Minimum match length */
267     U32 hashRateLog;       /* Log number of entries to skip */
268     U32 windowLog;          /* Window log for the LDM */
269 } ldmParams_t;
270 
271 typedef struct {
272     int collectSequences;
273     ZSTD_Sequence* seqStart;
274     size_t seqIndex;
275     size_t maxSequences;
276 } SeqCollector;
277 
278 struct ZSTD_CCtx_params_s {
279     ZSTD_format_e format;
280     ZSTD_compressionParameters cParams;
281     ZSTD_frameParameters fParams;
282 
283     int compressionLevel;
284     int forceWindow;           /* force back-references to respect limit of
285                                 * 1<<wLog, even for dictionary */
286     size_t targetCBlockSize;   /* Tries to fit compressed block size to be around targetCBlockSize.
287                                 * No target when targetCBlockSize == 0.
288                                 * There is no guarantee on compressed block size */
289     int srcSizeHint;           /* User's best guess of source size.
290                                 * Hint is not valid when srcSizeHint == 0.
291                                 * There is no guarantee that hint is close to actual source size */
292 
293     ZSTD_dictAttachPref_e attachDictPref;
294     ZSTD_paramSwitch_e literalCompressionMode;
295 
296     /* Multithreading: used to pass parameters to mtctx */
297     int nbWorkers;
298     size_t jobSize;
299     int overlapLog;
300     int rsyncable;
301 
302     /* Long distance matching parameters */
303     ldmParams_t ldmParams;
304 
305     /* Dedicated dict search algorithm trigger */
306     int enableDedicatedDictSearch;
307 
308     /* Input/output buffer modes */
309     ZSTD_bufferMode_e inBufferMode;
310     ZSTD_bufferMode_e outBufferMode;
311 
312     /* Sequence compression API */
313     ZSTD_sequenceFormat_e blockDelimiters;
314     int validateSequences;
315 
316     /* Block splitting */
317     ZSTD_paramSwitch_e useBlockSplitter;
318 
319     /* Param for deciding whether to use row-based matchfinder */
320     ZSTD_paramSwitch_e useRowMatchFinder;
321 
322     /* Always load a dictionary in ext-dict mode (not prefix mode)? */
323     int deterministicRefPrefix;
324 
325     /* Internal use, for createCCtxParams() and freeCCtxParams() only */
326     ZSTD_customMem customMem;
327 };  /* typedef'd to ZSTD_CCtx_params within "zstd.h" */
328 
329 #define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2))
330 #define ENTROPY_WORKSPACE_SIZE (HUF_WORKSPACE_SIZE + COMPRESS_SEQUENCES_WORKSPACE_SIZE)
331 
332 /*
333  * Indicates whether this compression proceeds directly from user-provided
334  * source buffer to user-provided destination buffer (ZSTDb_not_buffered), or
335  * whether the context needs to buffer the input/output (ZSTDb_buffered).
336  */
337 typedef enum {
338     ZSTDb_not_buffered,
339     ZSTDb_buffered
340 } ZSTD_buffered_policy_e;
341 
342 /*
343  * Struct that contains all elements of block splitter that should be allocated
344  * in a wksp.
345  */
346 #define ZSTD_MAX_NB_BLOCK_SPLITS 196
347 typedef struct {
348     seqStore_t fullSeqStoreChunk;
349     seqStore_t firstHalfSeqStore;
350     seqStore_t secondHalfSeqStore;
351     seqStore_t currSeqStore;
352     seqStore_t nextSeqStore;
353 
354     U32 partitions[ZSTD_MAX_NB_BLOCK_SPLITS];
355     ZSTD_entropyCTablesMetadata_t entropyMetadata;
356 } ZSTD_blockSplitCtx;
357 
358 struct ZSTD_CCtx_s {
359     ZSTD_compressionStage_e stage;
360     int cParamsChanged;                  /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */
361     int bmi2;                            /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
362     ZSTD_CCtx_params requestedParams;
363     ZSTD_CCtx_params appliedParams;
364     ZSTD_CCtx_params simpleApiParams;    /* Param storage used by the simple API - not sticky. Must only be used in top-level simple API functions for storage. */
365     U32   dictID;
366     size_t dictContentSize;
367 
368     ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */
369     size_t blockSize;
370     unsigned long long pledgedSrcSizePlusOne;  /* this way, 0 (default) == unknown */
371     unsigned long long consumedSrcSize;
372     unsigned long long producedCSize;
373     struct xxh64_state xxhState;
374     ZSTD_customMem customMem;
375     ZSTD_threadPool* pool;
376     size_t staticSize;
377     SeqCollector seqCollector;
378     int isFirstBlock;
379     int initialized;
380 
381     seqStore_t seqStore;      /* sequences storage ptrs */
382     ldmState_t ldmState;      /* long distance matching state */
383     rawSeq* ldmSequences;     /* Storage for the ldm output sequences */
384     size_t maxNbLdmSequences;
385     rawSeqStore_t externSeqStore; /* Mutable reference to external sequences */
386     ZSTD_blockState_t blockState;
387     U32* entropyWorkspace;  /* entropy workspace of ENTROPY_WORKSPACE_SIZE bytes */
388 
389     /* Whether we are streaming or not */
390     ZSTD_buffered_policy_e bufferedPolicy;
391 
392     /* streaming */
393     char*  inBuff;
394     size_t inBuffSize;
395     size_t inToCompress;
396     size_t inBuffPos;
397     size_t inBuffTarget;
398     char*  outBuff;
399     size_t outBuffSize;
400     size_t outBuffContentSize;
401     size_t outBuffFlushedSize;
402     ZSTD_cStreamStage streamStage;
403     U32    frameEnded;
404 
405     /* Stable in/out buffer verification */
406     ZSTD_inBuffer expectedInBuffer;
407     size_t expectedOutBufferSize;
408 
409     /* Dictionary */
410     ZSTD_localDict localDict;
411     const ZSTD_CDict* cdict;
412     ZSTD_prefixDict prefixDict;   /* single-usage dictionary */
413 
414     /* Multi-threading */
415 
416     /* Tracing */
417 
418     /* Workspace for block splitter */
419     ZSTD_blockSplitCtx blockSplitCtx;
420 };
421 
422 typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
423 
424 typedef enum {
425     ZSTD_noDict = 0,
426     ZSTD_extDict = 1,
427     ZSTD_dictMatchState = 2,
428     ZSTD_dedicatedDictSearch = 3
429 } ZSTD_dictMode_e;
430 
431 typedef enum {
432     ZSTD_cpm_noAttachDict = 0,  /* Compression with ZSTD_noDict or ZSTD_extDict.
433                                  * In this mode we use both the srcSize and the dictSize
434                                  * when selecting and adjusting parameters.
435                                  */
436     ZSTD_cpm_attachDict = 1,    /* Compression with ZSTD_dictMatchState or ZSTD_dedicatedDictSearch.
437                                  * In this mode we only take the srcSize into account when selecting
438                                  * and adjusting parameters.
439                                  */
440     ZSTD_cpm_createCDict = 2,   /* Creating a CDict.
441                                  * In this mode we take both the source size and the dictionary size
442                                  * into account when selecting and adjusting the parameters.
443                                  */
444     ZSTD_cpm_unknown = 3,       /* ZSTD_getCParams, ZSTD_getParams, ZSTD_adjustParams.
445                                  * We don't know what these parameters are for. We default to the legacy
446                                  * behavior of taking both the source size and the dict size into account
447                                  * when selecting and adjusting parameters.
448                                  */
449 } ZSTD_cParamMode_e;
450 
451 typedef size_t (*ZSTD_blockCompressor) (
452         ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
453         void const* src, size_t srcSize);
454 ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramSwitch_e rowMatchfinderMode, ZSTD_dictMode_e dictMode);
455 
456 
457 MEM_STATIC U32 ZSTD_LLcode(U32 litLength)
458 {
459     static const BYTE LL_Code[64] = {  0,  1,  2,  3,  4,  5,  6,  7,
460                                        8,  9, 10, 11, 12, 13, 14, 15,
461                                       16, 16, 17, 17, 18, 18, 19, 19,
462                                       20, 20, 20, 20, 21, 21, 21, 21,
463                                       22, 22, 22, 22, 22, 22, 22, 22,
464                                       23, 23, 23, 23, 23, 23, 23, 23,
465                                       24, 24, 24, 24, 24, 24, 24, 24,
466                                       24, 24, 24, 24, 24, 24, 24, 24 };
467     static const U32 LL_deltaCode = 19;
468     return (litLength > 63) ? ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength];
469 }
470 
471 /* ZSTD_MLcode() :
472  * note : mlBase = matchLength - MINMATCH;
473  *        because it's the format it's stored in seqStore->sequences */
474 MEM_STATIC U32 ZSTD_MLcode(U32 mlBase)
475 {
476     static const BYTE ML_Code[128] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
477                                       16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
478                                       32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37,
479                                       38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39,
480                                       40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
481                                       41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41,
482                                       42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
483                                       42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 };
484     static const U32 ML_deltaCode = 36;
485     return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase];
486 }
487 
488 /* ZSTD_cParam_withinBounds:
489  * @return 1 if value is within cParam bounds,
490  * 0 otherwise */
491 MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value)
492 {
493     ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);
494     if (ZSTD_isError(bounds.error)) return 0;
495     if (value < bounds.lowerBound) return 0;
496     if (value > bounds.upperBound) return 0;
497     return 1;
498 }
499 
500 /* ZSTD_noCompressBlock() :
501  * Writes uncompressed block to dst buffer from given src.
502  * Returns the size of the block */
503 MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock)
504 {
505     U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3);
506     RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity,
507                     dstSize_tooSmall, "dst buf too small for uncompressed block");
508     MEM_writeLE24(dst, cBlockHeader24);
509     ZSTD_memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize);
510     return ZSTD_blockHeaderSize + srcSize;
511 }
512 
513 MEM_STATIC size_t ZSTD_rleCompressBlock (void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock)
514 {
515     BYTE* const op = (BYTE*)dst;
516     U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3);
517     RETURN_ERROR_IF(dstCapacity < 4, dstSize_tooSmall, "");
518     MEM_writeLE24(op, cBlockHeader);
519     op[3] = src;
520     return 4;
521 }
522 
523 
524 /* ZSTD_minGain() :
525  * minimum compression required
526  * to generate a compress block or a compressed literals section.
527  * note : use same formula for both situations */
528 MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
529 {
530     U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6;
531     ZSTD_STATIC_ASSERT(ZSTD_btultra == 8);
532     assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat));
533     return (srcSize >> minlog) + 2;
534 }
535 
536 MEM_STATIC int ZSTD_literalsCompressionIsDisabled(const ZSTD_CCtx_params* cctxParams)
537 {
538     switch (cctxParams->literalCompressionMode) {
539     case ZSTD_ps_enable:
540         return 0;
541     case ZSTD_ps_disable:
542         return 1;
543     default:
544         assert(0 /* impossible: pre-validated */);
545         ZSTD_FALLTHROUGH;
546     case ZSTD_ps_auto:
547         return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0);
548     }
549 }
550 
551 /*! ZSTD_safecopyLiterals() :
552  *  memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w.
553  *  Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single
554  *  large copies.
555  */
556 static void
557 ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w)
558 {
559     assert(iend > ilimit_w);
560     if (ip <= ilimit_w) {
561         ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap);
562         op += ilimit_w - ip;
563         ip = ilimit_w;
564     }
565     while (ip < iend) *op++ = *ip++;
566 }
567 
568 #define ZSTD_REP_MOVE     (ZSTD_REP_NUM-1)
569 #define STORE_REPCODE_1 STORE_REPCODE(1)
570 #define STORE_REPCODE_2 STORE_REPCODE(2)
571 #define STORE_REPCODE_3 STORE_REPCODE(3)
572 #define STORE_REPCODE(r) (assert((r)>=1), assert((r)<=3), (r)-1)
573 #define STORE_OFFSET(o)  (assert((o)>0), o + ZSTD_REP_MOVE)
574 #define STORED_IS_OFFSET(o)  ((o) > ZSTD_REP_MOVE)
575 #define STORED_IS_REPCODE(o) ((o) <= ZSTD_REP_MOVE)
576 #define STORED_OFFSET(o)  (assert(STORED_IS_OFFSET(o)), (o)-ZSTD_REP_MOVE)
577 #define STORED_REPCODE(o) (assert(STORED_IS_REPCODE(o)), (o)+1)  /* returns ID 1,2,3 */
578 #define STORED_TO_OFFBASE(o) ((o)+1)
579 #define OFFBASE_TO_STORED(o) ((o)-1)
580 
581 /*! ZSTD_storeSeq() :
582  *  Store a sequence (litlen, litPtr, offCode and matchLength) into seqStore_t.
583  *  @offBase_minus1 : Users should use employ macros STORE_REPCODE_X and STORE_OFFSET().
584  *  @matchLength : must be >= MINMATCH
585  *  Allowed to overread literals up to litLimit.
586 */
587 HINT_INLINE UNUSED_ATTR void
588 ZSTD_storeSeq(seqStore_t* seqStorePtr,
589               size_t litLength, const BYTE* literals, const BYTE* litLimit,
590               U32 offBase_minus1,
591               size_t matchLength)
592 {
593     BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH;
594     BYTE const* const litEnd = literals + litLength;
595 #if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6)
596     static const BYTE* g_start = NULL;
597     if (g_start==NULL) g_start = (const BYTE*)literals;  /* note : index only works for compression within a single segment */
598     {   U32 const pos = (U32)((const BYTE*)literals - g_start);
599         DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u",
600                pos, (U32)litLength, (U32)matchLength, (U32)offBase_minus1);
601     }
602 #endif
603     assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
604     /* copy Literals */
605     assert(seqStorePtr->maxNbLit <= 128 KB);
606     assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit);
607     assert(literals + litLength <= litLimit);
608     if (litEnd <= litLimit_w) {
609         /* Common case we can use wildcopy.
610 	 * First copy 16 bytes, because literals are likely short.
611 	 */
612         assert(WILDCOPY_OVERLENGTH >= 16);
613         ZSTD_copy16(seqStorePtr->lit, literals);
614         if (litLength > 16) {
615             ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap);
616         }
617     } else {
618         ZSTD_safecopyLiterals(seqStorePtr->lit, literals, litEnd, litLimit_w);
619     }
620     seqStorePtr->lit += litLength;
621 
622     /* literal Length */
623     if (litLength>0xFFFF) {
624         assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
625         seqStorePtr->longLengthType = ZSTD_llt_literalLength;
626         seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
627     }
628     seqStorePtr->sequences[0].litLength = (U16)litLength;
629 
630     /* match offset */
631     seqStorePtr->sequences[0].offBase = STORED_TO_OFFBASE(offBase_minus1);
632 
633     /* match Length */
634     assert(matchLength >= MINMATCH);
635     {   size_t const mlBase = matchLength - MINMATCH;
636         if (mlBase>0xFFFF) {
637             assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
638             seqStorePtr->longLengthType = ZSTD_llt_matchLength;
639             seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
640         }
641         seqStorePtr->sequences[0].mlBase = (U16)mlBase;
642     }
643 
644     seqStorePtr->sequences++;
645 }
646 
647 /* ZSTD_updateRep() :
648  * updates in-place @rep (array of repeat offsets)
649  * @offBase_minus1 : sum-type, with same numeric representation as ZSTD_storeSeq()
650  */
651 MEM_STATIC void
652 ZSTD_updateRep(U32 rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0)
653 {
654     if (STORED_IS_OFFSET(offBase_minus1)) {  /* full offset */
655         rep[2] = rep[1];
656         rep[1] = rep[0];
657         rep[0] = STORED_OFFSET(offBase_minus1);
658     } else {   /* repcode */
659         U32 const repCode = STORED_REPCODE(offBase_minus1) - 1 + ll0;
660         if (repCode > 0) {  /* note : if repCode==0, no change */
661             U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
662             rep[2] = (repCode >= 2) ? rep[1] : rep[2];
663             rep[1] = rep[0];
664             rep[0] = currentOffset;
665         } else {   /* repCode == 0 */
666             /* nothing to do */
667         }
668     }
669 }
670 
671 typedef struct repcodes_s {
672     U32 rep[3];
673 } repcodes_t;
674 
675 MEM_STATIC repcodes_t
676 ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0)
677 {
678     repcodes_t newReps;
679     ZSTD_memcpy(&newReps, rep, sizeof(newReps));
680     ZSTD_updateRep(newReps.rep, offBase_minus1, ll0);
681     return newReps;
682 }
683 
684 
685 /*-*************************************
686 *  Match length counter
687 ***************************************/
688 static unsigned ZSTD_NbCommonBytes (size_t val)
689 {
690     if (MEM_isLittleEndian()) {
691         if (MEM_64bits()) {
692 #       if (__GNUC__ >= 4)
693             return (__builtin_ctzll((U64)val) >> 3);
694 #       else
695             static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2,
696                                                      0, 3, 1, 3, 1, 4, 2, 7,
697                                                      0, 2, 3, 6, 1, 5, 3, 5,
698                                                      1, 3, 4, 4, 2, 5, 6, 7,
699                                                      7, 0, 1, 2, 3, 3, 4, 6,
700                                                      2, 6, 5, 5, 3, 4, 5, 6,
701                                                      7, 1, 2, 4, 6, 4, 4, 5,
702                                                      7, 2, 6, 5, 7, 6, 7, 7 };
703             return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
704 #       endif
705         } else { /* 32 bits */
706 #       if (__GNUC__ >= 3)
707             return (__builtin_ctz((U32)val) >> 3);
708 #       else
709             static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0,
710                                                      3, 2, 2, 1, 3, 2, 0, 1,
711                                                      3, 3, 1, 2, 2, 2, 2, 0,
712                                                      3, 1, 2, 0, 1, 0, 1, 1 };
713             return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
714 #       endif
715         }
716     } else {  /* Big Endian CPU */
717         if (MEM_64bits()) {
718 #       if (__GNUC__ >= 4)
719             return (__builtin_clzll(val) >> 3);
720 #       else
721             unsigned r;
722             const unsigned n32 = sizeof(size_t)*4;   /* calculate this way due to compiler complaining in 32-bits mode */
723             if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
724             if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
725             r += (!val);
726             return r;
727 #       endif
728         } else { /* 32 bits */
729 #       if (__GNUC__ >= 3)
730             return (__builtin_clz((U32)val) >> 3);
731 #       else
732             unsigned r;
733             if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
734             r += (!val);
735             return r;
736 #       endif
737     }   }
738 }
739 
740 
741 MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit)
742 {
743     const BYTE* const pStart = pIn;
744     const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1);
745 
746     if (pIn < pInLoopLimit) {
747         { size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
748           if (diff) return ZSTD_NbCommonBytes(diff); }
749         pIn+=sizeof(size_t); pMatch+=sizeof(size_t);
750         while (pIn < pInLoopLimit) {
751             size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
752             if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; }
753             pIn += ZSTD_NbCommonBytes(diff);
754             return (size_t)(pIn - pStart);
755     }   }
756     if (MEM_64bits() && (pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; }
757     if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; }
758     if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
759     return (size_t)(pIn - pStart);
760 }
761 
762 /* ZSTD_count_2segments() :
763  *  can count match length with `ip` & `match` in 2 different segments.
764  *  convention : on reaching mEnd, match count continue starting from iStart
765  */
766 MEM_STATIC size_t
767 ZSTD_count_2segments(const BYTE* ip, const BYTE* match,
768                      const BYTE* iEnd, const BYTE* mEnd, const BYTE* iStart)
769 {
770     const BYTE* const vEnd = MIN( ip + (mEnd - match), iEnd);
771     size_t const matchLength = ZSTD_count(ip, match, vEnd);
772     if (match + matchLength != mEnd) return matchLength;
773     DEBUGLOG(7, "ZSTD_count_2segments: found a 2-parts match (current length==%zu)", matchLength);
774     DEBUGLOG(7, "distance from match beginning to end dictionary = %zi", mEnd - match);
775     DEBUGLOG(7, "distance from current pos to end buffer = %zi", iEnd - ip);
776     DEBUGLOG(7, "next byte : ip==%02X, istart==%02X", ip[matchLength], *iStart);
777     DEBUGLOG(7, "final match length = %zu", matchLength + ZSTD_count(ip+matchLength, iStart, iEnd));
778     return matchLength + ZSTD_count(ip+matchLength, iStart, iEnd);
779 }
780 
781 
782 /*-*************************************
783  *  Hashes
784  ***************************************/
785 static const U32 prime3bytes = 506832829U;
786 static U32    ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes)  >> (32-h) ; }
787 MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */
788 
789 static const U32 prime4bytes = 2654435761U;
790 static U32    ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; }
791 static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); }
792 
793 static const U64 prime5bytes = 889523592379ULL;
794 static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u  << (64-40)) * prime5bytes) >> (64-h)) ; }
795 static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); }
796 
797 static const U64 prime6bytes = 227718039650203ULL;
798 static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u  << (64-48)) * prime6bytes) >> (64-h)) ; }
799 static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
800 
801 static const U64 prime7bytes = 58295818150454627ULL;
802 static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u  << (64-56)) * prime7bytes) >> (64-h)) ; }
803 static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); }
804 
805 static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
806 static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
807 static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
808 
809 MEM_STATIC FORCE_INLINE_ATTR
810 size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
811 {
812     switch(mls)
813     {
814     default:
815     case 4: return ZSTD_hash4Ptr(p, hBits);
816     case 5: return ZSTD_hash5Ptr(p, hBits);
817     case 6: return ZSTD_hash6Ptr(p, hBits);
818     case 7: return ZSTD_hash7Ptr(p, hBits);
819     case 8: return ZSTD_hash8Ptr(p, hBits);
820     }
821 }
822 
823 /* ZSTD_ipow() :
824  * Return base^exponent.
825  */
826 static U64 ZSTD_ipow(U64 base, U64 exponent)
827 {
828     U64 power = 1;
829     while (exponent) {
830       if (exponent & 1) power *= base;
831       exponent >>= 1;
832       base *= base;
833     }
834     return power;
835 }
836 
837 #define ZSTD_ROLL_HASH_CHAR_OFFSET 10
838 
839 /* ZSTD_rollingHash_append() :
840  * Add the buffer to the hash value.
841  */
842 static U64 ZSTD_rollingHash_append(U64 hash, void const* buf, size_t size)
843 {
844     BYTE const* istart = (BYTE const*)buf;
845     size_t pos;
846     for (pos = 0; pos < size; ++pos) {
847         hash *= prime8bytes;
848         hash += istart[pos] + ZSTD_ROLL_HASH_CHAR_OFFSET;
849     }
850     return hash;
851 }
852 
853 /* ZSTD_rollingHash_compute() :
854  * Compute the rolling hash value of the buffer.
855  */
856 MEM_STATIC U64 ZSTD_rollingHash_compute(void const* buf, size_t size)
857 {
858     return ZSTD_rollingHash_append(0, buf, size);
859 }
860 
861 /* ZSTD_rollingHash_primePower() :
862  * Compute the primePower to be passed to ZSTD_rollingHash_rotate() for a hash
863  * over a window of length bytes.
864  */
865 MEM_STATIC U64 ZSTD_rollingHash_primePower(U32 length)
866 {
867     return ZSTD_ipow(prime8bytes, length - 1);
868 }
869 
870 /* ZSTD_rollingHash_rotate() :
871  * Rotate the rolling hash by one byte.
872  */
873 MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64 primePower)
874 {
875     hash -= (toRemove + ZSTD_ROLL_HASH_CHAR_OFFSET) * primePower;
876     hash *= prime8bytes;
877     hash += toAdd + ZSTD_ROLL_HASH_CHAR_OFFSET;
878     return hash;
879 }
880 
881 /*-*************************************
882 *  Round buffer management
883 ***************************************/
884 #if (ZSTD_WINDOWLOG_MAX_64 > 31)
885 # error "ZSTD_WINDOWLOG_MAX is too large : would overflow ZSTD_CURRENT_MAX"
886 #endif
887 /* Max current allowed */
888 #define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX))
889 /* Maximum chunk size before overflow correction needs to be called again */
890 #define ZSTD_CHUNKSIZE_MAX                                                     \
891     ( ((U32)-1)                  /* Maximum ending current index */            \
892     - ZSTD_CURRENT_MAX)          /* Maximum beginning lowLimit */
893 
894 /*
895  * ZSTD_window_clear():
896  * Clears the window containing the history by simply setting it to empty.
897  */
898 MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window)
899 {
900     size_t const endT = (size_t)(window->nextSrc - window->base);
901     U32 const end = (U32)endT;
902 
903     window->lowLimit = end;
904     window->dictLimit = end;
905 }
906 
907 MEM_STATIC U32 ZSTD_window_isEmpty(ZSTD_window_t const window)
908 {
909     return window.dictLimit == ZSTD_WINDOW_START_INDEX &&
910            window.lowLimit == ZSTD_WINDOW_START_INDEX &&
911            (window.nextSrc - window.base) == ZSTD_WINDOW_START_INDEX;
912 }
913 
914 /*
915  * ZSTD_window_hasExtDict():
916  * Returns non-zero if the window has a non-empty extDict.
917  */
918 MEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window)
919 {
920     return window.lowLimit < window.dictLimit;
921 }
922 
923 /*
924  * ZSTD_matchState_dictMode():
925  * Inspects the provided matchState and figures out what dictMode should be
926  * passed to the compressor.
927  */
928 MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms)
929 {
930     return ZSTD_window_hasExtDict(ms->window) ?
931         ZSTD_extDict :
932         ms->dictMatchState != NULL ?
933             (ms->dictMatchState->dedicatedDictSearch ? ZSTD_dedicatedDictSearch : ZSTD_dictMatchState) :
934             ZSTD_noDict;
935 }
936 
937 /* Defining this macro to non-zero tells zstd to run the overflow correction
938  * code much more frequently. This is very inefficient, and should only be
939  * used for tests and fuzzers.
940  */
941 #ifndef ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY
942 #  ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
943 #    define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 1
944 #  else
945 #    define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 0
946 #  endif
947 #endif
948 
949 /*
950  * ZSTD_window_canOverflowCorrect():
951  * Returns non-zero if the indices are large enough for overflow correction
952  * to work correctly without impacting compression ratio.
953  */
954 MEM_STATIC U32 ZSTD_window_canOverflowCorrect(ZSTD_window_t const window,
955                                               U32 cycleLog,
956                                               U32 maxDist,
957                                               U32 loadedDictEnd,
958                                               void const* src)
959 {
960     U32 const cycleSize = 1u << cycleLog;
961     U32 const curr = (U32)((BYTE const*)src - window.base);
962     U32 const minIndexToOverflowCorrect = cycleSize
963                                         + MAX(maxDist, cycleSize)
964                                         + ZSTD_WINDOW_START_INDEX;
965 
966     /* Adjust the min index to backoff the overflow correction frequency,
967      * so we don't waste too much CPU in overflow correction. If this
968      * computation overflows we don't really care, we just need to make
969      * sure it is at least minIndexToOverflowCorrect.
970      */
971     U32 const adjustment = window.nbOverflowCorrections + 1;
972     U32 const adjustedIndex = MAX(minIndexToOverflowCorrect * adjustment,
973                                   minIndexToOverflowCorrect);
974     U32 const indexLargeEnough = curr > adjustedIndex;
975 
976     /* Only overflow correct early if the dictionary is invalidated already,
977      * so we don't hurt compression ratio.
978      */
979     U32 const dictionaryInvalidated = curr > maxDist + loadedDictEnd;
980 
981     return indexLargeEnough && dictionaryInvalidated;
982 }
983 
984 /*
985  * ZSTD_window_needOverflowCorrection():
986  * Returns non-zero if the indices are getting too large and need overflow
987  * protection.
988  */
989 MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,
990                                                   U32 cycleLog,
991                                                   U32 maxDist,
992                                                   U32 loadedDictEnd,
993                                                   void const* src,
994                                                   void const* srcEnd)
995 {
996     U32 const curr = (U32)((BYTE const*)srcEnd - window.base);
997     if (ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) {
998         if (ZSTD_window_canOverflowCorrect(window, cycleLog, maxDist, loadedDictEnd, src)) {
999             return 1;
1000         }
1001     }
1002     return curr > ZSTD_CURRENT_MAX;
1003 }
1004 
1005 /*
1006  * ZSTD_window_correctOverflow():
1007  * Reduces the indices to protect from index overflow.
1008  * Returns the correction made to the indices, which must be applied to every
1009  * stored index.
1010  *
1011  * The least significant cycleLog bits of the indices must remain the same,
1012  * which may be 0. Every index up to maxDist in the past must be valid.
1013  */
1014 MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
1015                                            U32 maxDist, void const* src)
1016 {
1017     /* preemptive overflow correction:
1018      * 1. correction is large enough:
1019      *    lowLimit > (3<<29) ==> current > 3<<29 + 1<<windowLog
1020      *    1<<windowLog <= newCurrent < 1<<chainLog + 1<<windowLog
1021      *
1022      *    current - newCurrent
1023      *    > (3<<29 + 1<<windowLog) - (1<<windowLog + 1<<chainLog)
1024      *    > (3<<29) - (1<<chainLog)
1025      *    > (3<<29) - (1<<30)             (NOTE: chainLog <= 30)
1026      *    > 1<<29
1027      *
1028      * 2. (ip+ZSTD_CHUNKSIZE_MAX - cctx->base) doesn't overflow:
1029      *    After correction, current is less than (1<<chainLog + 1<<windowLog).
1030      *    In 64-bit mode we are safe, because we have 64-bit ptrdiff_t.
1031      *    In 32-bit mode we are safe, because (chainLog <= 29), so
1032      *    ip+ZSTD_CHUNKSIZE_MAX - cctx->base < 1<<32.
1033      * 3. (cctx->lowLimit + 1<<windowLog) < 1<<32:
1034      *    windowLog <= 31 ==> 3<<29 + 1<<windowLog < 7<<29 < 1<<32.
1035      */
1036     U32 const cycleSize = 1u << cycleLog;
1037     U32 const cycleMask = cycleSize - 1;
1038     U32 const curr = (U32)((BYTE const*)src - window->base);
1039     U32 const currentCycle = curr & cycleMask;
1040     /* Ensure newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX. */
1041     U32 const currentCycleCorrection = currentCycle < ZSTD_WINDOW_START_INDEX
1042                                      ? MAX(cycleSize, ZSTD_WINDOW_START_INDEX)
1043                                      : 0;
1044     U32 const newCurrent = currentCycle
1045                          + currentCycleCorrection
1046                          + MAX(maxDist, cycleSize);
1047     U32 const correction = curr - newCurrent;
1048     /* maxDist must be a power of two so that:
1049      *   (newCurrent & cycleMask) == (curr & cycleMask)
1050      * This is required to not corrupt the chains / binary tree.
1051      */
1052     assert((maxDist & (maxDist - 1)) == 0);
1053     assert((curr & cycleMask) == (newCurrent & cycleMask));
1054     assert(curr > newCurrent);
1055     if (!ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) {
1056         /* Loose bound, should be around 1<<29 (see above) */
1057         assert(correction > 1<<28);
1058     }
1059 
1060     window->base += correction;
1061     window->dictBase += correction;
1062     if (window->lowLimit < correction + ZSTD_WINDOW_START_INDEX) {
1063         window->lowLimit = ZSTD_WINDOW_START_INDEX;
1064     } else {
1065         window->lowLimit -= correction;
1066     }
1067     if (window->dictLimit < correction + ZSTD_WINDOW_START_INDEX) {
1068         window->dictLimit = ZSTD_WINDOW_START_INDEX;
1069     } else {
1070         window->dictLimit -= correction;
1071     }
1072 
1073     /* Ensure we can still reference the full window. */
1074     assert(newCurrent >= maxDist);
1075     assert(newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX);
1076     /* Ensure that lowLimit and dictLimit didn't underflow. */
1077     assert(window->lowLimit <= newCurrent);
1078     assert(window->dictLimit <= newCurrent);
1079 
1080     ++window->nbOverflowCorrections;
1081 
1082     DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction,
1083              window->lowLimit);
1084     return correction;
1085 }
1086 
1087 /*
1088  * ZSTD_window_enforceMaxDist():
1089  * Updates lowLimit so that:
1090  *    (srcEnd - base) - lowLimit == maxDist + loadedDictEnd
1091  *
1092  * It ensures index is valid as long as index >= lowLimit.
1093  * This must be called before a block compression call.
1094  *
1095  * loadedDictEnd is only defined if a dictionary is in use for current compression.
1096  * As the name implies, loadedDictEnd represents the index at end of dictionary.
1097  * The value lies within context's referential, it can be directly compared to blockEndIdx.
1098  *
1099  * If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0.
1100  * If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit.
1101  * This is because dictionaries are allowed to be referenced fully
1102  * as long as the last byte of the dictionary is in the window.
1103  * Once input has progressed beyond window size, dictionary cannot be referenced anymore.
1104  *
1105  * In normal dict mode, the dictionary lies between lowLimit and dictLimit.
1106  * In dictMatchState mode, lowLimit and dictLimit are the same,
1107  * and the dictionary is below them.
1108  * forceWindow and dictMatchState are therefore incompatible.
1109  */
1110 MEM_STATIC void
1111 ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
1112                      const void* blockEnd,
1113                            U32   maxDist,
1114                            U32*  loadedDictEndPtr,
1115                      const ZSTD_matchState_t** dictMatchStatePtr)
1116 {
1117     U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
1118     U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
1119     DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
1120                 (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
1121 
1122     /* - When there is no dictionary : loadedDictEnd == 0.
1123          In which case, the test (blockEndIdx > maxDist) is merely to avoid
1124          overflowing next operation `newLowLimit = blockEndIdx - maxDist`.
1125        - When there is a standard dictionary :
1126          Index referential is copied from the dictionary,
1127          which means it starts from 0.
1128          In which case, loadedDictEnd == dictSize,
1129          and it makes sense to compare `blockEndIdx > maxDist + dictSize`
1130          since `blockEndIdx` also starts from zero.
1131        - When there is an attached dictionary :
1132          loadedDictEnd is expressed within the referential of the context,
1133          so it can be directly compared against blockEndIdx.
1134     */
1135     if (blockEndIdx > maxDist + loadedDictEnd) {
1136         U32 const newLowLimit = blockEndIdx - maxDist;
1137         if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit;
1138         if (window->dictLimit < window->lowLimit) {
1139             DEBUGLOG(5, "Update dictLimit to match lowLimit, from %u to %u",
1140                         (unsigned)window->dictLimit, (unsigned)window->lowLimit);
1141             window->dictLimit = window->lowLimit;
1142         }
1143         /* On reaching window size, dictionaries are invalidated */
1144         if (loadedDictEndPtr) *loadedDictEndPtr = 0;
1145         if (dictMatchStatePtr) *dictMatchStatePtr = NULL;
1146     }
1147 }
1148 
1149 /* Similar to ZSTD_window_enforceMaxDist(),
1150  * but only invalidates dictionary
1151  * when input progresses beyond window size.
1152  * assumption : loadedDictEndPtr and dictMatchStatePtr are valid (non NULL)
1153  *              loadedDictEnd uses same referential as window->base
1154  *              maxDist is the window size */
1155 MEM_STATIC void
1156 ZSTD_checkDictValidity(const ZSTD_window_t* window,
1157                        const void* blockEnd,
1158                              U32   maxDist,
1159                              U32*  loadedDictEndPtr,
1160                        const ZSTD_matchState_t** dictMatchStatePtr)
1161 {
1162     assert(loadedDictEndPtr != NULL);
1163     assert(dictMatchStatePtr != NULL);
1164     {   U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
1165         U32 const loadedDictEnd = *loadedDictEndPtr;
1166         DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
1167                     (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
1168         assert(blockEndIdx >= loadedDictEnd);
1169 
1170         if (blockEndIdx > loadedDictEnd + maxDist) {
1171             /* On reaching window size, dictionaries are invalidated.
1172              * For simplification, if window size is reached anywhere within next block,
1173              * the dictionary is invalidated for the full block.
1174              */
1175             DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)");
1176             *loadedDictEndPtr = 0;
1177             *dictMatchStatePtr = NULL;
1178         } else {
1179             if (*loadedDictEndPtr != 0) {
1180                 DEBUGLOG(6, "dictionary considered valid for current block");
1181     }   }   }
1182 }
1183 
1184 MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {
1185     ZSTD_memset(window, 0, sizeof(*window));
1186     window->base = (BYTE const*)" ";
1187     window->dictBase = (BYTE const*)" ";
1188     ZSTD_STATIC_ASSERT(ZSTD_DUBT_UNSORTED_MARK < ZSTD_WINDOW_START_INDEX); /* Start above ZSTD_DUBT_UNSORTED_MARK */
1189     window->dictLimit = ZSTD_WINDOW_START_INDEX;    /* start from >0, so that 1st position is valid */
1190     window->lowLimit = ZSTD_WINDOW_START_INDEX;     /* it ensures first and later CCtx usages compress the same */
1191     window->nextSrc = window->base + ZSTD_WINDOW_START_INDEX;   /* see issue #1241 */
1192     window->nbOverflowCorrections = 0;
1193 }
1194 
1195 /*
1196  * ZSTD_window_update():
1197  * Updates the window by appending [src, src + srcSize) to the window.
1198  * If it is not contiguous, the current prefix becomes the extDict, and we
1199  * forget about the extDict. Handles overlap of the prefix and extDict.
1200  * Returns non-zero if the segment is contiguous.
1201  */
1202 MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
1203                                   void const* src, size_t srcSize,
1204                                   int forceNonContiguous)
1205 {
1206     BYTE const* const ip = (BYTE const*)src;
1207     U32 contiguous = 1;
1208     DEBUGLOG(5, "ZSTD_window_update");
1209     if (srcSize == 0)
1210         return contiguous;
1211     assert(window->base != NULL);
1212     assert(window->dictBase != NULL);
1213     /* Check if blocks follow each other */
1214     if (src != window->nextSrc || forceNonContiguous) {
1215         /* not contiguous */
1216         size_t const distanceFromBase = (size_t)(window->nextSrc - window->base);
1217         DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit);
1218         window->lowLimit = window->dictLimit;
1219         assert(distanceFromBase == (size_t)(U32)distanceFromBase);  /* should never overflow */
1220         window->dictLimit = (U32)distanceFromBase;
1221         window->dictBase = window->base;
1222         window->base = ip - distanceFromBase;
1223         /* ms->nextToUpdate = window->dictLimit; */
1224         if (window->dictLimit - window->lowLimit < HASH_READ_SIZE) window->lowLimit = window->dictLimit;   /* too small extDict */
1225         contiguous = 0;
1226     }
1227     window->nextSrc = ip + srcSize;
1228     /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */
1229     if ( (ip+srcSize > window->dictBase + window->lowLimit)
1230        & (ip < window->dictBase + window->dictLimit)) {
1231         ptrdiff_t const highInputIdx = (ip + srcSize) - window->dictBase;
1232         U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)window->dictLimit) ? window->dictLimit : (U32)highInputIdx;
1233         window->lowLimit = lowLimitMax;
1234         DEBUGLOG(5, "Overlapping extDict and input : new lowLimit = %u", window->lowLimit);
1235     }
1236     return contiguous;
1237 }
1238 
1239 /*
1240  * Returns the lowest allowed match index. It may either be in the ext-dict or the prefix.
1241  */
1242 MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog)
1243 {
1244     U32 const maxDistance = 1U << windowLog;
1245     U32 const lowestValid = ms->window.lowLimit;
1246     U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
1247     U32 const isDictionary = (ms->loadedDictEnd != 0);
1248     /* When using a dictionary the entire dictionary is valid if a single byte of the dictionary
1249      * is within the window. We invalidate the dictionary (and set loadedDictEnd to 0) when it isn't
1250      * valid for the entire block. So this check is sufficient to find the lowest valid match index.
1251      */
1252     U32 const matchLowest = isDictionary ? lowestValid : withinWindow;
1253     return matchLowest;
1254 }
1255 
1256 /*
1257  * Returns the lowest allowed match index in the prefix.
1258  */
1259 MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog)
1260 {
1261     U32    const maxDistance = 1U << windowLog;
1262     U32    const lowestValid = ms->window.dictLimit;
1263     U32    const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
1264     U32    const isDictionary = (ms->loadedDictEnd != 0);
1265     /* When computing the lowest prefix index we need to take the dictionary into account to handle
1266      * the edge case where the dictionary and the source are contiguous in memory.
1267      */
1268     U32    const matchLowest = isDictionary ? lowestValid : withinWindow;
1269     return matchLowest;
1270 }
1271 
1272 
1273 
1274 /* debug functions */
1275 #if (DEBUGLEVEL>=2)
1276 
1277 MEM_STATIC double ZSTD_fWeight(U32 rawStat)
1278 {
1279     U32 const fp_accuracy = 8;
1280     U32 const fp_multiplier = (1 << fp_accuracy);
1281     U32 const newStat = rawStat + 1;
1282     U32 const hb = ZSTD_highbit32(newStat);
1283     U32 const BWeight = hb * fp_multiplier;
1284     U32 const FWeight = (newStat << fp_accuracy) >> hb;
1285     U32 const weight = BWeight + FWeight;
1286     assert(hb + fp_accuracy < 31);
1287     return (double)weight / fp_multiplier;
1288 }
1289 
1290 /* display a table content,
1291  * listing each element, its frequency, and its predicted bit cost */
1292 MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
1293 {
1294     unsigned u, sum;
1295     for (u=0, sum=0; u<=max; u++) sum += table[u];
1296     DEBUGLOG(2, "total nb elts: %u", sum);
1297     for (u=0; u<=max; u++) {
1298         DEBUGLOG(2, "%2u: %5u  (%.2f)",
1299                 u, table[u], ZSTD_fWeight(sum) - ZSTD_fWeight(table[u]) );
1300     }
1301 }
1302 
1303 #endif
1304 
1305 
1306 
1307 /* ===============================================================
1308  * Shared internal declarations
1309  * These prototypes may be called from sources not in lib/compress
1310  * =============================================================== */
1311 
1312 /* ZSTD_loadCEntropy() :
1313  * dict : must point at beginning of a valid zstd dictionary.
1314  * return : size of dictionary header (size of magic number + dict ID + entropy tables)
1315  * assumptions : magic number supposed already checked
1316  *               and dictSize >= 8 */
1317 size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
1318                          const void* const dict, size_t dictSize);
1319 
1320 void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs);
1321 
1322 /* ==============================================================
1323  * Private declarations
1324  * These prototypes shall only be called from within lib/compress
1325  * ============================================================== */
1326 
1327 /* ZSTD_getCParamsFromCCtxParams() :
1328  * cParams are built depending on compressionLevel, src size hints,
1329  * LDM and manually set compression parameters.
1330  * Note: srcSizeHint == 0 means 0!
1331  */
1332 ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
1333         const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);
1334 
1335 /*! ZSTD_initCStream_internal() :
1336  *  Private use only. Init streaming operation.
1337  *  expects params to be valid.
1338  *  must receive dict, or cdict, or none, but not both.
1339  *  @return : 0, or an error code */
1340 size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
1341                      const void* dict, size_t dictSize,
1342                      const ZSTD_CDict* cdict,
1343                      const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize);
1344 
1345 void ZSTD_resetSeqStore(seqStore_t* ssPtr);
1346 
1347 /*! ZSTD_getCParamsFromCDict() :
1348  *  as the name implies */
1349 ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict);
1350 
1351 /* ZSTD_compressBegin_advanced_internal() :
1352  * Private use only. To be called from zstdmt_compress.c. */
1353 size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
1354                                     const void* dict, size_t dictSize,
1355                                     ZSTD_dictContentType_e dictContentType,
1356                                     ZSTD_dictTableLoadMethod_e dtlm,
1357                                     const ZSTD_CDict* cdict,
1358                                     const ZSTD_CCtx_params* params,
1359                                     unsigned long long pledgedSrcSize);
1360 
1361 /* ZSTD_compress_advanced_internal() :
1362  * Private use only. To be called from zstdmt_compress.c. */
1363 size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx,
1364                                        void* dst, size_t dstCapacity,
1365                                  const void* src, size_t srcSize,
1366                                  const void* dict,size_t dictSize,
1367                                  const ZSTD_CCtx_params* params);
1368 
1369 
1370 /* ZSTD_writeLastEmptyBlock() :
1371  * output an empty Block with end-of-frame mark to complete a frame
1372  * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h))
1373  *           or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize)
1374  */
1375 size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity);
1376 
1377 
1378 /* ZSTD_referenceExternalSequences() :
1379  * Must be called before starting a compression operation.
1380  * seqs must parse a prefix of the source.
1381  * This cannot be used when long range matching is enabled.
1382  * Zstd will use these sequences, and pass the literals to a secondary block
1383  * compressor.
1384  * @return : An error code on failure.
1385  * NOTE: seqs are not verified! Invalid sequences can cause out-of-bounds memory
1386  * access and data corruption.
1387  */
1388 size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq);
1389 
1390 /* ZSTD_cycleLog() :
1391  *  condition for correct operation : hashLog > 1 */
1392 U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat);
1393 
1394 /* ZSTD_CCtx_trace() :
1395  *  Trace the end of a compression call.
1396  */
1397 void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize);
1398 
1399 #endif /* ZSTD_COMPRESS_H */
1400