1 /*
2 * Copyright (c) Yann Collet, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
9 */
10
11 /* This header contains definitions
12 * that shall **only** be used by modules within lib/compress.
13 */
14
15 #ifndef ZSTD_COMPRESS_H
16 #define ZSTD_COMPRESS_H
17
18 /*-*************************************
19 * Dependencies
20 ***************************************/
21 #include "../common/zstd_internal.h"
22 #include "zstd_cwksp.h"
23
24
25 /*-*************************************
26 * Constants
27 ***************************************/
28 #define kSearchStrength 8
29 #define HASH_READ_SIZE 8
30 #define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted".
31 It could be confused for a real successor at index "1", if sorted as larger than its predecessor.
32 It's not a big deal though : candidate will just be sorted again.
33 Additionally, candidate position 1 will be lost.
34 But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
35 The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy.
36 This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
37
38
39 /*-*************************************
40 * Context memory management
41 ***************************************/
42 typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e;
43 typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage;
44
45 typedef struct ZSTD_prefixDict_s {
46 const void* dict;
47 size_t dictSize;
48 ZSTD_dictContentType_e dictContentType;
49 } ZSTD_prefixDict;
50
51 typedef struct {
52 void* dictBuffer;
53 void const* dict;
54 size_t dictSize;
55 ZSTD_dictContentType_e dictContentType;
56 ZSTD_CDict* cdict;
57 } ZSTD_localDict;
58
59 typedef struct {
60 HUF_CElt CTable[HUF_CTABLE_SIZE_ST(255)];
61 HUF_repeat repeatMode;
62 } ZSTD_hufCTables_t;
63
64 typedef struct {
65 FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
66 FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
67 FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
68 FSE_repeat offcode_repeatMode;
69 FSE_repeat matchlength_repeatMode;
70 FSE_repeat litlength_repeatMode;
71 } ZSTD_fseCTables_t;
72
73 typedef struct {
74 ZSTD_hufCTables_t huf;
75 ZSTD_fseCTables_t fse;
76 } ZSTD_entropyCTables_t;
77
78 /* *********************************************
79 * Entropy buffer statistics structs and funcs *
80 ***********************************************/
81 /* ZSTD_hufCTablesMetadata_t :
82 * Stores Literals Block Type for a super-block in hType, and
83 * huffman tree description in hufDesBuffer.
84 * hufDesSize refers to the size of huffman tree description in bytes.
85 * This metadata is populated in ZSTD_buildBlockEntropyStats_literals() */
86 typedef struct {
87 symbolEncodingType_e hType;
88 BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE];
89 size_t hufDesSize;
90 } ZSTD_hufCTablesMetadata_t;
91
92 /* ZSTD_fseCTablesMetadata_t :
93 * Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and
94 * fse tables in fseTablesBuffer.
95 * fseTablesSize refers to the size of fse tables in bytes.
96 * This metadata is populated in ZSTD_buildBlockEntropyStats_sequences() */
97 typedef struct {
98 symbolEncodingType_e llType;
99 symbolEncodingType_e ofType;
100 symbolEncodingType_e mlType;
101 BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE];
102 size_t fseTablesSize;
103 size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */
104 } ZSTD_fseCTablesMetadata_t;
105
106 typedef struct {
107 ZSTD_hufCTablesMetadata_t hufMetadata;
108 ZSTD_fseCTablesMetadata_t fseMetadata;
109 } ZSTD_entropyCTablesMetadata_t;
110
111 /* ZSTD_buildBlockEntropyStats() :
112 * Builds entropy for the block.
113 * @return : 0 on success or error code */
114 size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,
115 const ZSTD_entropyCTables_t* prevEntropy,
116 ZSTD_entropyCTables_t* nextEntropy,
117 const ZSTD_CCtx_params* cctxParams,
118 ZSTD_entropyCTablesMetadata_t* entropyMetadata,
119 void* workspace, size_t wkspSize);
120
121 /* *******************************
122 * Compression internals structs *
123 *********************************/
124
125 typedef struct {
126 U32 off; /* Offset sumtype code for the match, using ZSTD_storeSeq() format */
127 U32 len; /* Raw length of match */
128 } ZSTD_match_t;
129
130 typedef struct {
131 U32 offset; /* Offset of sequence */
132 U32 litLength; /* Length of literals prior to match */
133 U32 matchLength; /* Raw length of match */
134 } rawSeq;
135
136 typedef struct {
137 rawSeq* seq; /* The start of the sequences */
138 size_t pos; /* The index in seq where reading stopped. pos <= size. */
139 size_t posInSequence; /* The position within the sequence at seq[pos] where reading
140 stopped. posInSequence <= seq[pos].litLength + seq[pos].matchLength */
141 size_t size; /* The number of sequences. <= capacity. */
142 size_t capacity; /* The capacity starting from `seq` pointer */
143 } rawSeqStore_t;
144
145 UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0};
146
147 typedef struct {
148 int price;
149 U32 off;
150 U32 mlen;
151 U32 litlen;
152 U32 rep[ZSTD_REP_NUM];
153 } ZSTD_optimal_t;
154
155 typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e;
156
157 typedef struct {
158 /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */
159 unsigned* litFreq; /* table of literals statistics, of size 256 */
160 unsigned* litLengthFreq; /* table of litLength statistics, of size (MaxLL+1) */
161 unsigned* matchLengthFreq; /* table of matchLength statistics, of size (MaxML+1) */
162 unsigned* offCodeFreq; /* table of offCode statistics, of size (MaxOff+1) */
163 ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_NUM+1 */
164 ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */
165
166 U32 litSum; /* nb of literals */
167 U32 litLengthSum; /* nb of litLength codes */
168 U32 matchLengthSum; /* nb of matchLength codes */
169 U32 offCodeSum; /* nb of offset codes */
170 U32 litSumBasePrice; /* to compare to log2(litfreq) */
171 U32 litLengthSumBasePrice; /* to compare to log2(llfreq) */
172 U32 matchLengthSumBasePrice;/* to compare to log2(mlfreq) */
173 U32 offCodeSumBasePrice; /* to compare to log2(offreq) */
174 ZSTD_OptPrice_e priceType; /* prices can be determined dynamically, or follow a pre-defined cost structure */
175 const ZSTD_entropyCTables_t* symbolCosts; /* pre-calculated dictionary statistics */
176 ZSTD_paramSwitch_e literalCompressionMode;
177 } optState_t;
178
179 typedef struct {
180 ZSTD_entropyCTables_t entropy;
181 U32 rep[ZSTD_REP_NUM];
182 } ZSTD_compressedBlockState_t;
183
184 typedef struct {
185 BYTE const* nextSrc; /* next block here to continue on current prefix */
186 BYTE const* base; /* All regular indexes relative to this position */
187 BYTE const* dictBase; /* extDict indexes relative to this position */
188 U32 dictLimit; /* below that point, need extDict */
189 U32 lowLimit; /* below that point, no more valid data */
190 U32 nbOverflowCorrections; /* Number of times overflow correction has run since
191 * ZSTD_window_init(). Useful for debugging coredumps
192 * and for ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY.
193 */
194 } ZSTD_window_t;
195
196 #define ZSTD_WINDOW_START_INDEX 2
197
198 typedef struct ZSTD_matchState_t ZSTD_matchState_t;
199
200 #define ZSTD_ROW_HASH_CACHE_SIZE 8 /* Size of prefetching hash cache for row-based matchfinder */
201
202 struct ZSTD_matchState_t {
203 ZSTD_window_t window; /* State for window round buffer management */
204 U32 loadedDictEnd; /* index of end of dictionary, within context's referential.
205 * When loadedDictEnd != 0, a dictionary is in use, and still valid.
206 * This relies on a mechanism to set loadedDictEnd=0 when dictionary is no longer within distance.
207 * Such mechanism is provided within ZSTD_window_enforceMaxDist() and ZSTD_checkDictValidity().
208 * When dict referential is copied into active context (i.e. not attached),
209 * loadedDictEnd == dictSize, since referential starts from zero.
210 */
211 U32 nextToUpdate; /* index from which to continue table update */
212 U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */
213
214 U32 rowHashLog; /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/
215 U16* tagTable; /* For row-based matchFinder: A row-based table containing the hashes and head index. */
216 U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */
217
218 U32* hashTable;
219 U32* hashTable3;
220 U32* chainTable;
221
222 U32 forceNonContiguous; /* Non-zero if we should force non-contiguous load for the next window update. */
223
224 int dedicatedDictSearch; /* Indicates whether this matchState is using the
225 * dedicated dictionary search structure.
226 */
227 optState_t opt; /* optimal parser state */
228 const ZSTD_matchState_t* dictMatchState;
229 ZSTD_compressionParameters cParams;
230 const rawSeqStore_t* ldmSeqStore;
231 };
232
233 typedef struct {
234 ZSTD_compressedBlockState_t* prevCBlock;
235 ZSTD_compressedBlockState_t* nextCBlock;
236 ZSTD_matchState_t matchState;
237 } ZSTD_blockState_t;
238
239 typedef struct {
240 U32 offset;
241 U32 checksum;
242 } ldmEntry_t;
243
244 typedef struct {
245 BYTE const* split;
246 U32 hash;
247 U32 checksum;
248 ldmEntry_t* bucket;
249 } ldmMatchCandidate_t;
250
251 #define LDM_BATCH_SIZE 64
252
253 typedef struct {
254 ZSTD_window_t window; /* State for the window round buffer management */
255 ldmEntry_t* hashTable;
256 U32 loadedDictEnd;
257 BYTE* bucketOffsets; /* Next position in bucket to insert entry */
258 size_t splitIndices[LDM_BATCH_SIZE];
259 ldmMatchCandidate_t matchCandidates[LDM_BATCH_SIZE];
260 } ldmState_t;
261
262 typedef struct {
263 ZSTD_paramSwitch_e enableLdm; /* ZSTD_ps_enable to enable LDM. ZSTD_ps_auto by default */
264 U32 hashLog; /* Log size of hashTable */
265 U32 bucketSizeLog; /* Log bucket size for collision resolution, at most 8 */
266 U32 minMatchLength; /* Minimum match length */
267 U32 hashRateLog; /* Log number of entries to skip */
268 U32 windowLog; /* Window log for the LDM */
269 } ldmParams_t;
270
271 typedef struct {
272 int collectSequences;
273 ZSTD_Sequence* seqStart;
274 size_t seqIndex;
275 size_t maxSequences;
276 } SeqCollector;
277
278 struct ZSTD_CCtx_params_s {
279 ZSTD_format_e format;
280 ZSTD_compressionParameters cParams;
281 ZSTD_frameParameters fParams;
282
283 int compressionLevel;
284 int forceWindow; /* force back-references to respect limit of
285 * 1<<wLog, even for dictionary */
286 size_t targetCBlockSize; /* Tries to fit compressed block size to be around targetCBlockSize.
287 * No target when targetCBlockSize == 0.
288 * There is no guarantee on compressed block size */
289 int srcSizeHint; /* User's best guess of source size.
290 * Hint is not valid when srcSizeHint == 0.
291 * There is no guarantee that hint is close to actual source size */
292
293 ZSTD_dictAttachPref_e attachDictPref;
294 ZSTD_paramSwitch_e literalCompressionMode;
295
296 /* Multithreading: used to pass parameters to mtctx */
297 int nbWorkers;
298 size_t jobSize;
299 int overlapLog;
300 int rsyncable;
301
302 /* Long distance matching parameters */
303 ldmParams_t ldmParams;
304
305 /* Dedicated dict search algorithm trigger */
306 int enableDedicatedDictSearch;
307
308 /* Input/output buffer modes */
309 ZSTD_bufferMode_e inBufferMode;
310 ZSTD_bufferMode_e outBufferMode;
311
312 /* Sequence compression API */
313 ZSTD_sequenceFormat_e blockDelimiters;
314 int validateSequences;
315
316 /* Block splitting */
317 ZSTD_paramSwitch_e useBlockSplitter;
318
319 /* Param for deciding whether to use row-based matchfinder */
320 ZSTD_paramSwitch_e useRowMatchFinder;
321
322 /* Always load a dictionary in ext-dict mode (not prefix mode)? */
323 int deterministicRefPrefix;
324
325 /* Internal use, for createCCtxParams() and freeCCtxParams() only */
326 ZSTD_customMem customMem;
327 }; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */
328
329 #define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2))
330 #define ENTROPY_WORKSPACE_SIZE (HUF_WORKSPACE_SIZE + COMPRESS_SEQUENCES_WORKSPACE_SIZE)
331
332 /*
333 * Indicates whether this compression proceeds directly from user-provided
334 * source buffer to user-provided destination buffer (ZSTDb_not_buffered), or
335 * whether the context needs to buffer the input/output (ZSTDb_buffered).
336 */
337 typedef enum {
338 ZSTDb_not_buffered,
339 ZSTDb_buffered
340 } ZSTD_buffered_policy_e;
341
342 /*
343 * Struct that contains all elements of block splitter that should be allocated
344 * in a wksp.
345 */
346 #define ZSTD_MAX_NB_BLOCK_SPLITS 196
347 typedef struct {
348 seqStore_t fullSeqStoreChunk;
349 seqStore_t firstHalfSeqStore;
350 seqStore_t secondHalfSeqStore;
351 seqStore_t currSeqStore;
352 seqStore_t nextSeqStore;
353
354 U32 partitions[ZSTD_MAX_NB_BLOCK_SPLITS];
355 ZSTD_entropyCTablesMetadata_t entropyMetadata;
356 } ZSTD_blockSplitCtx;
357
358 struct ZSTD_CCtx_s {
359 ZSTD_compressionStage_e stage;
360 int cParamsChanged; /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */
361 int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
362 ZSTD_CCtx_params requestedParams;
363 ZSTD_CCtx_params appliedParams;
364 ZSTD_CCtx_params simpleApiParams; /* Param storage used by the simple API - not sticky. Must only be used in top-level simple API functions for storage. */
365 U32 dictID;
366 size_t dictContentSize;
367
368 ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */
369 size_t blockSize;
370 unsigned long long pledgedSrcSizePlusOne; /* this way, 0 (default) == unknown */
371 unsigned long long consumedSrcSize;
372 unsigned long long producedCSize;
373 struct xxh64_state xxhState;
374 ZSTD_customMem customMem;
375 ZSTD_threadPool* pool;
376 size_t staticSize;
377 SeqCollector seqCollector;
378 int isFirstBlock;
379 int initialized;
380
381 seqStore_t seqStore; /* sequences storage ptrs */
382 ldmState_t ldmState; /* long distance matching state */
383 rawSeq* ldmSequences; /* Storage for the ldm output sequences */
384 size_t maxNbLdmSequences;
385 rawSeqStore_t externSeqStore; /* Mutable reference to external sequences */
386 ZSTD_blockState_t blockState;
387 U32* entropyWorkspace; /* entropy workspace of ENTROPY_WORKSPACE_SIZE bytes */
388
389 /* Whether we are streaming or not */
390 ZSTD_buffered_policy_e bufferedPolicy;
391
392 /* streaming */
393 char* inBuff;
394 size_t inBuffSize;
395 size_t inToCompress;
396 size_t inBuffPos;
397 size_t inBuffTarget;
398 char* outBuff;
399 size_t outBuffSize;
400 size_t outBuffContentSize;
401 size_t outBuffFlushedSize;
402 ZSTD_cStreamStage streamStage;
403 U32 frameEnded;
404
405 /* Stable in/out buffer verification */
406 ZSTD_inBuffer expectedInBuffer;
407 size_t expectedOutBufferSize;
408
409 /* Dictionary */
410 ZSTD_localDict localDict;
411 const ZSTD_CDict* cdict;
412 ZSTD_prefixDict prefixDict; /* single-usage dictionary */
413
414 /* Multi-threading */
415
416 /* Tracing */
417
418 /* Workspace for block splitter */
419 ZSTD_blockSplitCtx blockSplitCtx;
420 };
421
422 typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
423
424 typedef enum {
425 ZSTD_noDict = 0,
426 ZSTD_extDict = 1,
427 ZSTD_dictMatchState = 2,
428 ZSTD_dedicatedDictSearch = 3
429 } ZSTD_dictMode_e;
430
431 typedef enum {
432 ZSTD_cpm_noAttachDict = 0, /* Compression with ZSTD_noDict or ZSTD_extDict.
433 * In this mode we use both the srcSize and the dictSize
434 * when selecting and adjusting parameters.
435 */
436 ZSTD_cpm_attachDict = 1, /* Compression with ZSTD_dictMatchState or ZSTD_dedicatedDictSearch.
437 * In this mode we only take the srcSize into account when selecting
438 * and adjusting parameters.
439 */
440 ZSTD_cpm_createCDict = 2, /* Creating a CDict.
441 * In this mode we take both the source size and the dictionary size
442 * into account when selecting and adjusting the parameters.
443 */
444 ZSTD_cpm_unknown = 3, /* ZSTD_getCParams, ZSTD_getParams, ZSTD_adjustParams.
445 * We don't know what these parameters are for. We default to the legacy
446 * behavior of taking both the source size and the dict size into account
447 * when selecting and adjusting parameters.
448 */
449 } ZSTD_cParamMode_e;
450
451 typedef size_t (*ZSTD_blockCompressor) (
452 ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
453 void const* src, size_t srcSize);
454 ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramSwitch_e rowMatchfinderMode, ZSTD_dictMode_e dictMode);
455
456
ZSTD_LLcode(U32 litLength)457 MEM_STATIC U32 ZSTD_LLcode(U32 litLength)
458 {
459 static const BYTE LL_Code[64] = { 0, 1, 2, 3, 4, 5, 6, 7,
460 8, 9, 10, 11, 12, 13, 14, 15,
461 16, 16, 17, 17, 18, 18, 19, 19,
462 20, 20, 20, 20, 21, 21, 21, 21,
463 22, 22, 22, 22, 22, 22, 22, 22,
464 23, 23, 23, 23, 23, 23, 23, 23,
465 24, 24, 24, 24, 24, 24, 24, 24,
466 24, 24, 24, 24, 24, 24, 24, 24 };
467 static const U32 LL_deltaCode = 19;
468 return (litLength > 63) ? ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength];
469 }
470
471 /* ZSTD_MLcode() :
472 * note : mlBase = matchLength - MINMATCH;
473 * because it's the format it's stored in seqStore->sequences */
ZSTD_MLcode(U32 mlBase)474 MEM_STATIC U32 ZSTD_MLcode(U32 mlBase)
475 {
476 static const BYTE ML_Code[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
477 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
478 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37,
479 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39,
480 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
481 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41,
482 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
483 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 };
484 static const U32 ML_deltaCode = 36;
485 return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase];
486 }
487
488 /* ZSTD_cParam_withinBounds:
489 * @return 1 if value is within cParam bounds,
490 * 0 otherwise */
ZSTD_cParam_withinBounds(ZSTD_cParameter cParam,int value)491 MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value)
492 {
493 ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);
494 if (ZSTD_isError(bounds.error)) return 0;
495 if (value < bounds.lowerBound) return 0;
496 if (value > bounds.upperBound) return 0;
497 return 1;
498 }
499
500 /* ZSTD_noCompressBlock() :
501 * Writes uncompressed block to dst buffer from given src.
502 * Returns the size of the block */
ZSTD_noCompressBlock(void * dst,size_t dstCapacity,const void * src,size_t srcSize,U32 lastBlock)503 MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock)
504 {
505 U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3);
506 RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity,
507 dstSize_tooSmall, "dst buf too small for uncompressed block");
508 MEM_writeLE24(dst, cBlockHeader24);
509 ZSTD_memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize);
510 return ZSTD_blockHeaderSize + srcSize;
511 }
512
ZSTD_rleCompressBlock(void * dst,size_t dstCapacity,BYTE src,size_t srcSize,U32 lastBlock)513 MEM_STATIC size_t ZSTD_rleCompressBlock (void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock)
514 {
515 BYTE* const op = (BYTE*)dst;
516 U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3);
517 RETURN_ERROR_IF(dstCapacity < 4, dstSize_tooSmall, "");
518 MEM_writeLE24(op, cBlockHeader);
519 op[3] = src;
520 return 4;
521 }
522
523
524 /* ZSTD_minGain() :
525 * minimum compression required
526 * to generate a compress block or a compressed literals section.
527 * note : use same formula for both situations */
ZSTD_minGain(size_t srcSize,ZSTD_strategy strat)528 MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
529 {
530 U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6;
531 ZSTD_STATIC_ASSERT(ZSTD_btultra == 8);
532 assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat));
533 return (srcSize >> minlog) + 2;
534 }
535
ZSTD_literalsCompressionIsDisabled(const ZSTD_CCtx_params * cctxParams)536 MEM_STATIC int ZSTD_literalsCompressionIsDisabled(const ZSTD_CCtx_params* cctxParams)
537 {
538 switch (cctxParams->literalCompressionMode) {
539 case ZSTD_ps_enable:
540 return 0;
541 case ZSTD_ps_disable:
542 return 1;
543 default:
544 assert(0 /* impossible: pre-validated */);
545 ZSTD_FALLTHROUGH;
546 case ZSTD_ps_auto:
547 return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0);
548 }
549 }
550
551 /*! ZSTD_safecopyLiterals() :
552 * memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w.
553 * Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single
554 * large copies.
555 */
556 static void
ZSTD_safecopyLiterals(BYTE * op,BYTE const * ip,BYTE const * const iend,BYTE const * ilimit_w)557 ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w)
558 {
559 assert(iend > ilimit_w);
560 if (ip <= ilimit_w) {
561 ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap);
562 op += ilimit_w - ip;
563 ip = ilimit_w;
564 }
565 while (ip < iend) *op++ = *ip++;
566 }
567
568 #define ZSTD_REP_MOVE (ZSTD_REP_NUM-1)
569 #define STORE_REPCODE_1 STORE_REPCODE(1)
570 #define STORE_REPCODE_2 STORE_REPCODE(2)
571 #define STORE_REPCODE_3 STORE_REPCODE(3)
572 #define STORE_REPCODE(r) (assert((r)>=1), assert((r)<=3), (r)-1)
573 #define STORE_OFFSET(o) (assert((o)>0), o + ZSTD_REP_MOVE)
574 #define STORED_IS_OFFSET(o) ((o) > ZSTD_REP_MOVE)
575 #define STORED_IS_REPCODE(o) ((o) <= ZSTD_REP_MOVE)
576 #define STORED_OFFSET(o) (assert(STORED_IS_OFFSET(o)), (o)-ZSTD_REP_MOVE)
577 #define STORED_REPCODE(o) (assert(STORED_IS_REPCODE(o)), (o)+1) /* returns ID 1,2,3 */
578 #define STORED_TO_OFFBASE(o) ((o)+1)
579 #define OFFBASE_TO_STORED(o) ((o)-1)
580
581 /*! ZSTD_storeSeq() :
582 * Store a sequence (litlen, litPtr, offCode and matchLength) into seqStore_t.
583 * @offBase_minus1 : Users should use employ macros STORE_REPCODE_X and STORE_OFFSET().
584 * @matchLength : must be >= MINMATCH
585 * Allowed to overread literals up to litLimit.
586 */
587 HINT_INLINE UNUSED_ATTR void
ZSTD_storeSeq(seqStore_t * seqStorePtr,size_t litLength,const BYTE * literals,const BYTE * litLimit,U32 offBase_minus1,size_t matchLength)588 ZSTD_storeSeq(seqStore_t* seqStorePtr,
589 size_t litLength, const BYTE* literals, const BYTE* litLimit,
590 U32 offBase_minus1,
591 size_t matchLength)
592 {
593 BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH;
594 BYTE const* const litEnd = literals + litLength;
595 #if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6)
596 static const BYTE* g_start = NULL;
597 if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */
598 { U32 const pos = (U32)((const BYTE*)literals - g_start);
599 DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u",
600 pos, (U32)litLength, (U32)matchLength, (U32)offBase_minus1);
601 }
602 #endif
603 assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
604 /* copy Literals */
605 assert(seqStorePtr->maxNbLit <= 128 KB);
606 assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit);
607 assert(literals + litLength <= litLimit);
608 if (litEnd <= litLimit_w) {
609 /* Common case we can use wildcopy.
610 * First copy 16 bytes, because literals are likely short.
611 */
612 assert(WILDCOPY_OVERLENGTH >= 16);
613 ZSTD_copy16(seqStorePtr->lit, literals);
614 if (litLength > 16) {
615 ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap);
616 }
617 } else {
618 ZSTD_safecopyLiterals(seqStorePtr->lit, literals, litEnd, litLimit_w);
619 }
620 seqStorePtr->lit += litLength;
621
622 /* literal Length */
623 if (litLength>0xFFFF) {
624 assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
625 seqStorePtr->longLengthType = ZSTD_llt_literalLength;
626 seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
627 }
628 seqStorePtr->sequences[0].litLength = (U16)litLength;
629
630 /* match offset */
631 seqStorePtr->sequences[0].offBase = STORED_TO_OFFBASE(offBase_minus1);
632
633 /* match Length */
634 assert(matchLength >= MINMATCH);
635 { size_t const mlBase = matchLength - MINMATCH;
636 if (mlBase>0xFFFF) {
637 assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
638 seqStorePtr->longLengthType = ZSTD_llt_matchLength;
639 seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
640 }
641 seqStorePtr->sequences[0].mlBase = (U16)mlBase;
642 }
643
644 seqStorePtr->sequences++;
645 }
646
647 /* ZSTD_updateRep() :
648 * updates in-place @rep (array of repeat offsets)
649 * @offBase_minus1 : sum-type, with same numeric representation as ZSTD_storeSeq()
650 */
651 MEM_STATIC void
ZSTD_updateRep(U32 rep[ZSTD_REP_NUM],U32 const offBase_minus1,U32 const ll0)652 ZSTD_updateRep(U32 rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0)
653 {
654 if (STORED_IS_OFFSET(offBase_minus1)) { /* full offset */
655 rep[2] = rep[1];
656 rep[1] = rep[0];
657 rep[0] = STORED_OFFSET(offBase_minus1);
658 } else { /* repcode */
659 U32 const repCode = STORED_REPCODE(offBase_minus1) - 1 + ll0;
660 if (repCode > 0) { /* note : if repCode==0, no change */
661 U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
662 rep[2] = (repCode >= 2) ? rep[1] : rep[2];
663 rep[1] = rep[0];
664 rep[0] = currentOffset;
665 } else { /* repCode == 0 */
666 /* nothing to do */
667 }
668 }
669 }
670
671 typedef struct repcodes_s {
672 U32 rep[3];
673 } repcodes_t;
674
675 MEM_STATIC repcodes_t
ZSTD_newRep(U32 const rep[ZSTD_REP_NUM],U32 const offBase_minus1,U32 const ll0)676 ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0)
677 {
678 repcodes_t newReps;
679 ZSTD_memcpy(&newReps, rep, sizeof(newReps));
680 ZSTD_updateRep(newReps.rep, offBase_minus1, ll0);
681 return newReps;
682 }
683
684
685 /*-*************************************
686 * Match length counter
687 ***************************************/
ZSTD_NbCommonBytes(size_t val)688 static unsigned ZSTD_NbCommonBytes (size_t val)
689 {
690 if (MEM_isLittleEndian()) {
691 if (MEM_64bits()) {
692 # if (__GNUC__ >= 4)
693 return (__builtin_ctzll((U64)val) >> 3);
694 # else
695 static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2,
696 0, 3, 1, 3, 1, 4, 2, 7,
697 0, 2, 3, 6, 1, 5, 3, 5,
698 1, 3, 4, 4, 2, 5, 6, 7,
699 7, 0, 1, 2, 3, 3, 4, 6,
700 2, 6, 5, 5, 3, 4, 5, 6,
701 7, 1, 2, 4, 6, 4, 4, 5,
702 7, 2, 6, 5, 7, 6, 7, 7 };
703 return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
704 # endif
705 } else { /* 32 bits */
706 # if (__GNUC__ >= 3)
707 return (__builtin_ctz((U32)val) >> 3);
708 # else
709 static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0,
710 3, 2, 2, 1, 3, 2, 0, 1,
711 3, 3, 1, 2, 2, 2, 2, 0,
712 3, 1, 2, 0, 1, 0, 1, 1 };
713 return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
714 # endif
715 }
716 } else { /* Big Endian CPU */
717 if (MEM_64bits()) {
718 # if (__GNUC__ >= 4)
719 return (__builtin_clzll(val) >> 3);
720 # else
721 unsigned r;
722 const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */
723 if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
724 if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
725 r += (!val);
726 return r;
727 # endif
728 } else { /* 32 bits */
729 # if (__GNUC__ >= 3)
730 return (__builtin_clz((U32)val) >> 3);
731 # else
732 unsigned r;
733 if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
734 r += (!val);
735 return r;
736 # endif
737 } }
738 }
739
740
ZSTD_count(const BYTE * pIn,const BYTE * pMatch,const BYTE * const pInLimit)741 MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit)
742 {
743 const BYTE* const pStart = pIn;
744 const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1);
745
746 if (pIn < pInLoopLimit) {
747 { size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
748 if (diff) return ZSTD_NbCommonBytes(diff); }
749 pIn+=sizeof(size_t); pMatch+=sizeof(size_t);
750 while (pIn < pInLoopLimit) {
751 size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
752 if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; }
753 pIn += ZSTD_NbCommonBytes(diff);
754 return (size_t)(pIn - pStart);
755 } }
756 if (MEM_64bits() && (pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; }
757 if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; }
758 if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
759 return (size_t)(pIn - pStart);
760 }
761
762 /* ZSTD_count_2segments() :
763 * can count match length with `ip` & `match` in 2 different segments.
764 * convention : on reaching mEnd, match count continue starting from iStart
765 */
766 MEM_STATIC size_t
ZSTD_count_2segments(const BYTE * ip,const BYTE * match,const BYTE * iEnd,const BYTE * mEnd,const BYTE * iStart)767 ZSTD_count_2segments(const BYTE* ip, const BYTE* match,
768 const BYTE* iEnd, const BYTE* mEnd, const BYTE* iStart)
769 {
770 const BYTE* const vEnd = MIN( ip + (mEnd - match), iEnd);
771 size_t const matchLength = ZSTD_count(ip, match, vEnd);
772 if (match + matchLength != mEnd) return matchLength;
773 DEBUGLOG(7, "ZSTD_count_2segments: found a 2-parts match (current length==%zu)", matchLength);
774 DEBUGLOG(7, "distance from match beginning to end dictionary = %zi", mEnd - match);
775 DEBUGLOG(7, "distance from current pos to end buffer = %zi", iEnd - ip);
776 DEBUGLOG(7, "next byte : ip==%02X, istart==%02X", ip[matchLength], *iStart);
777 DEBUGLOG(7, "final match length = %zu", matchLength + ZSTD_count(ip+matchLength, iStart, iEnd));
778 return matchLength + ZSTD_count(ip+matchLength, iStart, iEnd);
779 }
780
781
782 /*-*************************************
783 * Hashes
784 ***************************************/
785 static const U32 prime3bytes = 506832829U;
ZSTD_hash3(U32 u,U32 h)786 static U32 ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes) >> (32-h) ; }
ZSTD_hash3Ptr(const void * ptr,U32 h)787 MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */
788
789 static const U32 prime4bytes = 2654435761U;
ZSTD_hash4(U32 u,U32 h)790 static U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; }
ZSTD_hash4Ptr(const void * ptr,U32 h)791 static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); }
792
793 static const U64 prime5bytes = 889523592379ULL;
ZSTD_hash5(U64 u,U32 h)794 static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u << (64-40)) * prime5bytes) >> (64-h)) ; }
ZSTD_hash5Ptr(const void * p,U32 h)795 static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); }
796
797 static const U64 prime6bytes = 227718039650203ULL;
ZSTD_hash6(U64 u,U32 h)798 static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; }
ZSTD_hash6Ptr(const void * p,U32 h)799 static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
800
801 static const U64 prime7bytes = 58295818150454627ULL;
ZSTD_hash7(U64 u,U32 h)802 static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u << (64-56)) * prime7bytes) >> (64-h)) ; }
ZSTD_hash7Ptr(const void * p,U32 h)803 static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); }
804
805 static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
ZSTD_hash8(U64 u,U32 h)806 static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
ZSTD_hash8Ptr(const void * p,U32 h)807 static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
808
809 MEM_STATIC FORCE_INLINE_ATTR
ZSTD_hashPtr(const void * p,U32 hBits,U32 mls)810 size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
811 {
812 switch(mls)
813 {
814 default:
815 case 4: return ZSTD_hash4Ptr(p, hBits);
816 case 5: return ZSTD_hash5Ptr(p, hBits);
817 case 6: return ZSTD_hash6Ptr(p, hBits);
818 case 7: return ZSTD_hash7Ptr(p, hBits);
819 case 8: return ZSTD_hash8Ptr(p, hBits);
820 }
821 }
822
823 /* ZSTD_ipow() :
824 * Return base^exponent.
825 */
ZSTD_ipow(U64 base,U64 exponent)826 static U64 ZSTD_ipow(U64 base, U64 exponent)
827 {
828 U64 power = 1;
829 while (exponent) {
830 if (exponent & 1) power *= base;
831 exponent >>= 1;
832 base *= base;
833 }
834 return power;
835 }
836
837 #define ZSTD_ROLL_HASH_CHAR_OFFSET 10
838
839 /* ZSTD_rollingHash_append() :
840 * Add the buffer to the hash value.
841 */
ZSTD_rollingHash_append(U64 hash,void const * buf,size_t size)842 static U64 ZSTD_rollingHash_append(U64 hash, void const* buf, size_t size)
843 {
844 BYTE const* istart = (BYTE const*)buf;
845 size_t pos;
846 for (pos = 0; pos < size; ++pos) {
847 hash *= prime8bytes;
848 hash += istart[pos] + ZSTD_ROLL_HASH_CHAR_OFFSET;
849 }
850 return hash;
851 }
852
853 /* ZSTD_rollingHash_compute() :
854 * Compute the rolling hash value of the buffer.
855 */
ZSTD_rollingHash_compute(void const * buf,size_t size)856 MEM_STATIC U64 ZSTD_rollingHash_compute(void const* buf, size_t size)
857 {
858 return ZSTD_rollingHash_append(0, buf, size);
859 }
860
861 /* ZSTD_rollingHash_primePower() :
862 * Compute the primePower to be passed to ZSTD_rollingHash_rotate() for a hash
863 * over a window of length bytes.
864 */
ZSTD_rollingHash_primePower(U32 length)865 MEM_STATIC U64 ZSTD_rollingHash_primePower(U32 length)
866 {
867 return ZSTD_ipow(prime8bytes, length - 1);
868 }
869
870 /* ZSTD_rollingHash_rotate() :
871 * Rotate the rolling hash by one byte.
872 */
ZSTD_rollingHash_rotate(U64 hash,BYTE toRemove,BYTE toAdd,U64 primePower)873 MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64 primePower)
874 {
875 hash -= (toRemove + ZSTD_ROLL_HASH_CHAR_OFFSET) * primePower;
876 hash *= prime8bytes;
877 hash += toAdd + ZSTD_ROLL_HASH_CHAR_OFFSET;
878 return hash;
879 }
880
881 /*-*************************************
882 * Round buffer management
883 ***************************************/
884 #if (ZSTD_WINDOWLOG_MAX_64 > 31)
885 # error "ZSTD_WINDOWLOG_MAX is too large : would overflow ZSTD_CURRENT_MAX"
886 #endif
887 /* Max current allowed */
888 #define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX))
889 /* Maximum chunk size before overflow correction needs to be called again */
890 #define ZSTD_CHUNKSIZE_MAX \
891 ( ((U32)-1) /* Maximum ending current index */ \
892 - ZSTD_CURRENT_MAX) /* Maximum beginning lowLimit */
893
894 /*
895 * ZSTD_window_clear():
896 * Clears the window containing the history by simply setting it to empty.
897 */
ZSTD_window_clear(ZSTD_window_t * window)898 MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window)
899 {
900 size_t const endT = (size_t)(window->nextSrc - window->base);
901 U32 const end = (U32)endT;
902
903 window->lowLimit = end;
904 window->dictLimit = end;
905 }
906
ZSTD_window_isEmpty(ZSTD_window_t const window)907 MEM_STATIC U32 ZSTD_window_isEmpty(ZSTD_window_t const window)
908 {
909 return window.dictLimit == ZSTD_WINDOW_START_INDEX &&
910 window.lowLimit == ZSTD_WINDOW_START_INDEX &&
911 (window.nextSrc - window.base) == ZSTD_WINDOW_START_INDEX;
912 }
913
914 /*
915 * ZSTD_window_hasExtDict():
916 * Returns non-zero if the window has a non-empty extDict.
917 */
ZSTD_window_hasExtDict(ZSTD_window_t const window)918 MEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window)
919 {
920 return window.lowLimit < window.dictLimit;
921 }
922
923 /*
924 * ZSTD_matchState_dictMode():
925 * Inspects the provided matchState and figures out what dictMode should be
926 * passed to the compressor.
927 */
ZSTD_matchState_dictMode(const ZSTD_matchState_t * ms)928 MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms)
929 {
930 return ZSTD_window_hasExtDict(ms->window) ?
931 ZSTD_extDict :
932 ms->dictMatchState != NULL ?
933 (ms->dictMatchState->dedicatedDictSearch ? ZSTD_dedicatedDictSearch : ZSTD_dictMatchState) :
934 ZSTD_noDict;
935 }
936
937 /* Defining this macro to non-zero tells zstd to run the overflow correction
938 * code much more frequently. This is very inefficient, and should only be
939 * used for tests and fuzzers.
940 */
941 #ifndef ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY
942 # ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
943 # define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 1
944 # else
945 # define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 0
946 # endif
947 #endif
948
949 /*
950 * ZSTD_window_canOverflowCorrect():
951 * Returns non-zero if the indices are large enough for overflow correction
952 * to work correctly without impacting compression ratio.
953 */
ZSTD_window_canOverflowCorrect(ZSTD_window_t const window,U32 cycleLog,U32 maxDist,U32 loadedDictEnd,void const * src)954 MEM_STATIC U32 ZSTD_window_canOverflowCorrect(ZSTD_window_t const window,
955 U32 cycleLog,
956 U32 maxDist,
957 U32 loadedDictEnd,
958 void const* src)
959 {
960 U32 const cycleSize = 1u << cycleLog;
961 U32 const curr = (U32)((BYTE const*)src - window.base);
962 U32 const minIndexToOverflowCorrect = cycleSize
963 + MAX(maxDist, cycleSize)
964 + ZSTD_WINDOW_START_INDEX;
965
966 /* Adjust the min index to backoff the overflow correction frequency,
967 * so we don't waste too much CPU in overflow correction. If this
968 * computation overflows we don't really care, we just need to make
969 * sure it is at least minIndexToOverflowCorrect.
970 */
971 U32 const adjustment = window.nbOverflowCorrections + 1;
972 U32 const adjustedIndex = MAX(minIndexToOverflowCorrect * adjustment,
973 minIndexToOverflowCorrect);
974 U32 const indexLargeEnough = curr > adjustedIndex;
975
976 /* Only overflow correct early if the dictionary is invalidated already,
977 * so we don't hurt compression ratio.
978 */
979 U32 const dictionaryInvalidated = curr > maxDist + loadedDictEnd;
980
981 return indexLargeEnough && dictionaryInvalidated;
982 }
983
984 /*
985 * ZSTD_window_needOverflowCorrection():
986 * Returns non-zero if the indices are getting too large and need overflow
987 * protection.
988 */
ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,U32 cycleLog,U32 maxDist,U32 loadedDictEnd,void const * src,void const * srcEnd)989 MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,
990 U32 cycleLog,
991 U32 maxDist,
992 U32 loadedDictEnd,
993 void const* src,
994 void const* srcEnd)
995 {
996 U32 const curr = (U32)((BYTE const*)srcEnd - window.base);
997 if (ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) {
998 if (ZSTD_window_canOverflowCorrect(window, cycleLog, maxDist, loadedDictEnd, src)) {
999 return 1;
1000 }
1001 }
1002 return curr > ZSTD_CURRENT_MAX;
1003 }
1004
1005 /*
1006 * ZSTD_window_correctOverflow():
1007 * Reduces the indices to protect from index overflow.
1008 * Returns the correction made to the indices, which must be applied to every
1009 * stored index.
1010 *
1011 * The least significant cycleLog bits of the indices must remain the same,
1012 * which may be 0. Every index up to maxDist in the past must be valid.
1013 */
ZSTD_window_correctOverflow(ZSTD_window_t * window,U32 cycleLog,U32 maxDist,void const * src)1014 MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
1015 U32 maxDist, void const* src)
1016 {
1017 /* preemptive overflow correction:
1018 * 1. correction is large enough:
1019 * lowLimit > (3<<29) ==> current > 3<<29 + 1<<windowLog
1020 * 1<<windowLog <= newCurrent < 1<<chainLog + 1<<windowLog
1021 *
1022 * current - newCurrent
1023 * > (3<<29 + 1<<windowLog) - (1<<windowLog + 1<<chainLog)
1024 * > (3<<29) - (1<<chainLog)
1025 * > (3<<29) - (1<<30) (NOTE: chainLog <= 30)
1026 * > 1<<29
1027 *
1028 * 2. (ip+ZSTD_CHUNKSIZE_MAX - cctx->base) doesn't overflow:
1029 * After correction, current is less than (1<<chainLog + 1<<windowLog).
1030 * In 64-bit mode we are safe, because we have 64-bit ptrdiff_t.
1031 * In 32-bit mode we are safe, because (chainLog <= 29), so
1032 * ip+ZSTD_CHUNKSIZE_MAX - cctx->base < 1<<32.
1033 * 3. (cctx->lowLimit + 1<<windowLog) < 1<<32:
1034 * windowLog <= 31 ==> 3<<29 + 1<<windowLog < 7<<29 < 1<<32.
1035 */
1036 U32 const cycleSize = 1u << cycleLog;
1037 U32 const cycleMask = cycleSize - 1;
1038 U32 const curr = (U32)((BYTE const*)src - window->base);
1039 U32 const currentCycle = curr & cycleMask;
1040 /* Ensure newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX. */
1041 U32 const currentCycleCorrection = currentCycle < ZSTD_WINDOW_START_INDEX
1042 ? MAX(cycleSize, ZSTD_WINDOW_START_INDEX)
1043 : 0;
1044 U32 const newCurrent = currentCycle
1045 + currentCycleCorrection
1046 + MAX(maxDist, cycleSize);
1047 U32 const correction = curr - newCurrent;
1048 /* maxDist must be a power of two so that:
1049 * (newCurrent & cycleMask) == (curr & cycleMask)
1050 * This is required to not corrupt the chains / binary tree.
1051 */
1052 assert((maxDist & (maxDist - 1)) == 0);
1053 assert((curr & cycleMask) == (newCurrent & cycleMask));
1054 assert(curr > newCurrent);
1055 if (!ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) {
1056 /* Loose bound, should be around 1<<29 (see above) */
1057 assert(correction > 1<<28);
1058 }
1059
1060 window->base += correction;
1061 window->dictBase += correction;
1062 if (window->lowLimit < correction + ZSTD_WINDOW_START_INDEX) {
1063 window->lowLimit = ZSTD_WINDOW_START_INDEX;
1064 } else {
1065 window->lowLimit -= correction;
1066 }
1067 if (window->dictLimit < correction + ZSTD_WINDOW_START_INDEX) {
1068 window->dictLimit = ZSTD_WINDOW_START_INDEX;
1069 } else {
1070 window->dictLimit -= correction;
1071 }
1072
1073 /* Ensure we can still reference the full window. */
1074 assert(newCurrent >= maxDist);
1075 assert(newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX);
1076 /* Ensure that lowLimit and dictLimit didn't underflow. */
1077 assert(window->lowLimit <= newCurrent);
1078 assert(window->dictLimit <= newCurrent);
1079
1080 ++window->nbOverflowCorrections;
1081
1082 DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction,
1083 window->lowLimit);
1084 return correction;
1085 }
1086
1087 /*
1088 * ZSTD_window_enforceMaxDist():
1089 * Updates lowLimit so that:
1090 * (srcEnd - base) - lowLimit == maxDist + loadedDictEnd
1091 *
1092 * It ensures index is valid as long as index >= lowLimit.
1093 * This must be called before a block compression call.
1094 *
1095 * loadedDictEnd is only defined if a dictionary is in use for current compression.
1096 * As the name implies, loadedDictEnd represents the index at end of dictionary.
1097 * The value lies within context's referential, it can be directly compared to blockEndIdx.
1098 *
1099 * If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0.
1100 * If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit.
1101 * This is because dictionaries are allowed to be referenced fully
1102 * as long as the last byte of the dictionary is in the window.
1103 * Once input has progressed beyond window size, dictionary cannot be referenced anymore.
1104 *
1105 * In normal dict mode, the dictionary lies between lowLimit and dictLimit.
1106 * In dictMatchState mode, lowLimit and dictLimit are the same,
1107 * and the dictionary is below them.
1108 * forceWindow and dictMatchState are therefore incompatible.
1109 */
1110 MEM_STATIC void
ZSTD_window_enforceMaxDist(ZSTD_window_t * window,const void * blockEnd,U32 maxDist,U32 * loadedDictEndPtr,const ZSTD_matchState_t ** dictMatchStatePtr)1111 ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
1112 const void* blockEnd,
1113 U32 maxDist,
1114 U32* loadedDictEndPtr,
1115 const ZSTD_matchState_t** dictMatchStatePtr)
1116 {
1117 U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
1118 U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
1119 DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
1120 (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
1121
1122 /* - When there is no dictionary : loadedDictEnd == 0.
1123 In which case, the test (blockEndIdx > maxDist) is merely to avoid
1124 overflowing next operation `newLowLimit = blockEndIdx - maxDist`.
1125 - When there is a standard dictionary :
1126 Index referential is copied from the dictionary,
1127 which means it starts from 0.
1128 In which case, loadedDictEnd == dictSize,
1129 and it makes sense to compare `blockEndIdx > maxDist + dictSize`
1130 since `blockEndIdx` also starts from zero.
1131 - When there is an attached dictionary :
1132 loadedDictEnd is expressed within the referential of the context,
1133 so it can be directly compared against blockEndIdx.
1134 */
1135 if (blockEndIdx > maxDist + loadedDictEnd) {
1136 U32 const newLowLimit = blockEndIdx - maxDist;
1137 if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit;
1138 if (window->dictLimit < window->lowLimit) {
1139 DEBUGLOG(5, "Update dictLimit to match lowLimit, from %u to %u",
1140 (unsigned)window->dictLimit, (unsigned)window->lowLimit);
1141 window->dictLimit = window->lowLimit;
1142 }
1143 /* On reaching window size, dictionaries are invalidated */
1144 if (loadedDictEndPtr) *loadedDictEndPtr = 0;
1145 if (dictMatchStatePtr) *dictMatchStatePtr = NULL;
1146 }
1147 }
1148
1149 /* Similar to ZSTD_window_enforceMaxDist(),
1150 * but only invalidates dictionary
1151 * when input progresses beyond window size.
1152 * assumption : loadedDictEndPtr and dictMatchStatePtr are valid (non NULL)
1153 * loadedDictEnd uses same referential as window->base
1154 * maxDist is the window size */
1155 MEM_STATIC void
ZSTD_checkDictValidity(const ZSTD_window_t * window,const void * blockEnd,U32 maxDist,U32 * loadedDictEndPtr,const ZSTD_matchState_t ** dictMatchStatePtr)1156 ZSTD_checkDictValidity(const ZSTD_window_t* window,
1157 const void* blockEnd,
1158 U32 maxDist,
1159 U32* loadedDictEndPtr,
1160 const ZSTD_matchState_t** dictMatchStatePtr)
1161 {
1162 assert(loadedDictEndPtr != NULL);
1163 assert(dictMatchStatePtr != NULL);
1164 { U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
1165 U32 const loadedDictEnd = *loadedDictEndPtr;
1166 DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
1167 (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
1168 assert(blockEndIdx >= loadedDictEnd);
1169
1170 if (blockEndIdx > loadedDictEnd + maxDist) {
1171 /* On reaching window size, dictionaries are invalidated.
1172 * For simplification, if window size is reached anywhere within next block,
1173 * the dictionary is invalidated for the full block.
1174 */
1175 DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)");
1176 *loadedDictEndPtr = 0;
1177 *dictMatchStatePtr = NULL;
1178 } else {
1179 if (*loadedDictEndPtr != 0) {
1180 DEBUGLOG(6, "dictionary considered valid for current block");
1181 } } }
1182 }
1183
ZSTD_window_init(ZSTD_window_t * window)1184 MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {
1185 ZSTD_memset(window, 0, sizeof(*window));
1186 window->base = (BYTE const*)" ";
1187 window->dictBase = (BYTE const*)" ";
1188 ZSTD_STATIC_ASSERT(ZSTD_DUBT_UNSORTED_MARK < ZSTD_WINDOW_START_INDEX); /* Start above ZSTD_DUBT_UNSORTED_MARK */
1189 window->dictLimit = ZSTD_WINDOW_START_INDEX; /* start from >0, so that 1st position is valid */
1190 window->lowLimit = ZSTD_WINDOW_START_INDEX; /* it ensures first and later CCtx usages compress the same */
1191 window->nextSrc = window->base + ZSTD_WINDOW_START_INDEX; /* see issue #1241 */
1192 window->nbOverflowCorrections = 0;
1193 }
1194
1195 /*
1196 * ZSTD_window_update():
1197 * Updates the window by appending [src, src + srcSize) to the window.
1198 * If it is not contiguous, the current prefix becomes the extDict, and we
1199 * forget about the extDict. Handles overlap of the prefix and extDict.
1200 * Returns non-zero if the segment is contiguous.
1201 */
ZSTD_window_update(ZSTD_window_t * window,void const * src,size_t srcSize,int forceNonContiguous)1202 MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
1203 void const* src, size_t srcSize,
1204 int forceNonContiguous)
1205 {
1206 BYTE const* const ip = (BYTE const*)src;
1207 U32 contiguous = 1;
1208 DEBUGLOG(5, "ZSTD_window_update");
1209 if (srcSize == 0)
1210 return contiguous;
1211 assert(window->base != NULL);
1212 assert(window->dictBase != NULL);
1213 /* Check if blocks follow each other */
1214 if (src != window->nextSrc || forceNonContiguous) {
1215 /* not contiguous */
1216 size_t const distanceFromBase = (size_t)(window->nextSrc - window->base);
1217 DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit);
1218 window->lowLimit = window->dictLimit;
1219 assert(distanceFromBase == (size_t)(U32)distanceFromBase); /* should never overflow */
1220 window->dictLimit = (U32)distanceFromBase;
1221 window->dictBase = window->base;
1222 window->base = ip - distanceFromBase;
1223 /* ms->nextToUpdate = window->dictLimit; */
1224 if (window->dictLimit - window->lowLimit < HASH_READ_SIZE) window->lowLimit = window->dictLimit; /* too small extDict */
1225 contiguous = 0;
1226 }
1227 window->nextSrc = ip + srcSize;
1228 /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */
1229 if ( (ip+srcSize > window->dictBase + window->lowLimit)
1230 & (ip < window->dictBase + window->dictLimit)) {
1231 ptrdiff_t const highInputIdx = (ip + srcSize) - window->dictBase;
1232 U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)window->dictLimit) ? window->dictLimit : (U32)highInputIdx;
1233 window->lowLimit = lowLimitMax;
1234 DEBUGLOG(5, "Overlapping extDict and input : new lowLimit = %u", window->lowLimit);
1235 }
1236 return contiguous;
1237 }
1238
1239 /*
1240 * Returns the lowest allowed match index. It may either be in the ext-dict or the prefix.
1241 */
ZSTD_getLowestMatchIndex(const ZSTD_matchState_t * ms,U32 curr,unsigned windowLog)1242 MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog)
1243 {
1244 U32 const maxDistance = 1U << windowLog;
1245 U32 const lowestValid = ms->window.lowLimit;
1246 U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
1247 U32 const isDictionary = (ms->loadedDictEnd != 0);
1248 /* When using a dictionary the entire dictionary is valid if a single byte of the dictionary
1249 * is within the window. We invalidate the dictionary (and set loadedDictEnd to 0) when it isn't
1250 * valid for the entire block. So this check is sufficient to find the lowest valid match index.
1251 */
1252 U32 const matchLowest = isDictionary ? lowestValid : withinWindow;
1253 return matchLowest;
1254 }
1255
1256 /*
1257 * Returns the lowest allowed match index in the prefix.
1258 */
ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t * ms,U32 curr,unsigned windowLog)1259 MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog)
1260 {
1261 U32 const maxDistance = 1U << windowLog;
1262 U32 const lowestValid = ms->window.dictLimit;
1263 U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
1264 U32 const isDictionary = (ms->loadedDictEnd != 0);
1265 /* When computing the lowest prefix index we need to take the dictionary into account to handle
1266 * the edge case where the dictionary and the source are contiguous in memory.
1267 */
1268 U32 const matchLowest = isDictionary ? lowestValid : withinWindow;
1269 return matchLowest;
1270 }
1271
1272
1273
1274 /* debug functions */
1275 #if (DEBUGLEVEL>=2)
1276
ZSTD_fWeight(U32 rawStat)1277 MEM_STATIC double ZSTD_fWeight(U32 rawStat)
1278 {
1279 U32 const fp_accuracy = 8;
1280 U32 const fp_multiplier = (1 << fp_accuracy);
1281 U32 const newStat = rawStat + 1;
1282 U32 const hb = ZSTD_highbit32(newStat);
1283 U32 const BWeight = hb * fp_multiplier;
1284 U32 const FWeight = (newStat << fp_accuracy) >> hb;
1285 U32 const weight = BWeight + FWeight;
1286 assert(hb + fp_accuracy < 31);
1287 return (double)weight / fp_multiplier;
1288 }
1289
1290 /* display a table content,
1291 * listing each element, its frequency, and its predicted bit cost */
ZSTD_debugTable(const U32 * table,U32 max)1292 MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
1293 {
1294 unsigned u, sum;
1295 for (u=0, sum=0; u<=max; u++) sum += table[u];
1296 DEBUGLOG(2, "total nb elts: %u", sum);
1297 for (u=0; u<=max; u++) {
1298 DEBUGLOG(2, "%2u: %5u (%.2f)",
1299 u, table[u], ZSTD_fWeight(sum) - ZSTD_fWeight(table[u]) );
1300 }
1301 }
1302
1303 #endif
1304
1305
1306
1307 /* ===============================================================
1308 * Shared internal declarations
1309 * These prototypes may be called from sources not in lib/compress
1310 * =============================================================== */
1311
1312 /* ZSTD_loadCEntropy() :
1313 * dict : must point at beginning of a valid zstd dictionary.
1314 * return : size of dictionary header (size of magic number + dict ID + entropy tables)
1315 * assumptions : magic number supposed already checked
1316 * and dictSize >= 8 */
1317 size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
1318 const void* const dict, size_t dictSize);
1319
1320 void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs);
1321
1322 /* ==============================================================
1323 * Private declarations
1324 * These prototypes shall only be called from within lib/compress
1325 * ============================================================== */
1326
1327 /* ZSTD_getCParamsFromCCtxParams() :
1328 * cParams are built depending on compressionLevel, src size hints,
1329 * LDM and manually set compression parameters.
1330 * Note: srcSizeHint == 0 means 0!
1331 */
1332 ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
1333 const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);
1334
1335 /*! ZSTD_initCStream_internal() :
1336 * Private use only. Init streaming operation.
1337 * expects params to be valid.
1338 * must receive dict, or cdict, or none, but not both.
1339 * @return : 0, or an error code */
1340 size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
1341 const void* dict, size_t dictSize,
1342 const ZSTD_CDict* cdict,
1343 const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize);
1344
1345 void ZSTD_resetSeqStore(seqStore_t* ssPtr);
1346
1347 /*! ZSTD_getCParamsFromCDict() :
1348 * as the name implies */
1349 ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict);
1350
1351 /* ZSTD_compressBegin_advanced_internal() :
1352 * Private use only. To be called from zstdmt_compress.c. */
1353 size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
1354 const void* dict, size_t dictSize,
1355 ZSTD_dictContentType_e dictContentType,
1356 ZSTD_dictTableLoadMethod_e dtlm,
1357 const ZSTD_CDict* cdict,
1358 const ZSTD_CCtx_params* params,
1359 unsigned long long pledgedSrcSize);
1360
1361 /* ZSTD_compress_advanced_internal() :
1362 * Private use only. To be called from zstdmt_compress.c. */
1363 size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx,
1364 void* dst, size_t dstCapacity,
1365 const void* src, size_t srcSize,
1366 const void* dict,size_t dictSize,
1367 const ZSTD_CCtx_params* params);
1368
1369
1370 /* ZSTD_writeLastEmptyBlock() :
1371 * output an empty Block with end-of-frame mark to complete a frame
1372 * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h))
1373 * or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize)
1374 */
1375 size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity);
1376
1377
1378 /* ZSTD_referenceExternalSequences() :
1379 * Must be called before starting a compression operation.
1380 * seqs must parse a prefix of the source.
1381 * This cannot be used when long range matching is enabled.
1382 * Zstd will use these sequences, and pass the literals to a secondary block
1383 * compressor.
1384 * @return : An error code on failure.
1385 * NOTE: seqs are not verified! Invalid sequences can cause out-of-bounds memory
1386 * access and data corruption.
1387 */
1388 size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq);
1389
1390 /* ZSTD_cycleLog() :
1391 * condition for correct operation : hashLog > 1 */
1392 U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat);
1393
1394 /* ZSTD_CCtx_trace() :
1395 * Trace the end of a compression call.
1396 */
1397 void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize);
1398
1399 #endif /* ZSTD_COMPRESS_H */
1400