1e0c1b49fSNick Terrell /*
2e0c1b49fSNick Terrell  * Copyright (c) Yann Collet, Facebook, Inc.
3e0c1b49fSNick Terrell  * All rights reserved.
4e0c1b49fSNick Terrell  *
5e0c1b49fSNick Terrell  * This source code is licensed under both the BSD-style license (found in the
6e0c1b49fSNick Terrell  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7e0c1b49fSNick Terrell  * in the COPYING file in the root directory of this source tree).
8e0c1b49fSNick Terrell  * You may select, at your option, one of the above-listed licenses.
9e0c1b49fSNick Terrell  */
10e0c1b49fSNick Terrell 
11e0c1b49fSNick Terrell /* zstd_decompress_block :
12e0c1b49fSNick Terrell  * this module takes care of decompressing _compressed_ block */
13e0c1b49fSNick Terrell 
14e0c1b49fSNick Terrell /*-*******************************************************
15e0c1b49fSNick Terrell *  Dependencies
16e0c1b49fSNick Terrell *********************************************************/
17e0c1b49fSNick Terrell #include "../common/zstd_deps.h"   /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
18e0c1b49fSNick Terrell #include "../common/compiler.h"    /* prefetch */
19e0c1b49fSNick Terrell #include "../common/cpu.h"         /* bmi2 */
20e0c1b49fSNick Terrell #include "../common/mem.h"         /* low level memory routines */
21e0c1b49fSNick Terrell #define FSE_STATIC_LINKING_ONLY
22e0c1b49fSNick Terrell #include "../common/fse.h"
23e0c1b49fSNick Terrell #define HUF_STATIC_LINKING_ONLY
24e0c1b49fSNick Terrell #include "../common/huf.h"
25e0c1b49fSNick Terrell #include "../common/zstd_internal.h"
26e0c1b49fSNick Terrell #include "zstd_decompress_internal.h"   /* ZSTD_DCtx */
27e0c1b49fSNick Terrell #include "zstd_ddict.h"  /* ZSTD_DDictDictContent */
28e0c1b49fSNick Terrell #include "zstd_decompress_block.h"
29e0c1b49fSNick Terrell 
30e0c1b49fSNick Terrell /*_*******************************************************
31e0c1b49fSNick Terrell *  Macros
32e0c1b49fSNick Terrell **********************************************************/
33e0c1b49fSNick Terrell 
34e0c1b49fSNick Terrell /* These two optional macros force the use one way or another of the two
35e0c1b49fSNick Terrell  * ZSTD_decompressSequences implementations. You can't force in both directions
36e0c1b49fSNick Terrell  * at the same time.
37e0c1b49fSNick Terrell  */
38e0c1b49fSNick Terrell #if defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
39e0c1b49fSNick Terrell     defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
40e0c1b49fSNick Terrell #error "Cannot force the use of the short and the long ZSTD_decompressSequences variants!"
41e0c1b49fSNick Terrell #endif
42e0c1b49fSNick Terrell 
43e0c1b49fSNick Terrell 
44e0c1b49fSNick Terrell /*_*******************************************************
45e0c1b49fSNick Terrell *  Memory operations
46e0c1b49fSNick Terrell **********************************************************/
ZSTD_copy4(void * dst,const void * src)47e0c1b49fSNick Terrell static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, 4); }
48e0c1b49fSNick Terrell 
49e0c1b49fSNick Terrell 
50e0c1b49fSNick Terrell /*-*************************************************************
51e0c1b49fSNick Terrell  *   Block decoding
52e0c1b49fSNick Terrell  ***************************************************************/
53e0c1b49fSNick Terrell 
54e0c1b49fSNick Terrell /*! ZSTD_getcBlockSize() :
55e0c1b49fSNick Terrell  *  Provides the size of compressed block from block header `src` */
ZSTD_getcBlockSize(const void * src,size_t srcSize,blockProperties_t * bpPtr)56e0c1b49fSNick Terrell size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
57e0c1b49fSNick Terrell                           blockProperties_t* bpPtr)
58e0c1b49fSNick Terrell {
59e0c1b49fSNick Terrell     RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong, "");
60e0c1b49fSNick Terrell 
61e0c1b49fSNick Terrell     {   U32 const cBlockHeader = MEM_readLE24(src);
62e0c1b49fSNick Terrell         U32 const cSize = cBlockHeader >> 3;
63e0c1b49fSNick Terrell         bpPtr->lastBlock = cBlockHeader & 1;
64e0c1b49fSNick Terrell         bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3);
65e0c1b49fSNick Terrell         bpPtr->origSize = cSize;   /* only useful for RLE */
66e0c1b49fSNick Terrell         if (bpPtr->blockType == bt_rle) return 1;
67e0c1b49fSNick Terrell         RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected, "");
68e0c1b49fSNick Terrell         return cSize;
69e0c1b49fSNick Terrell     }
70e0c1b49fSNick Terrell }
71e0c1b49fSNick Terrell 
72*2aa14b1aSNick Terrell /* Allocate buffer for literals, either overlapping current dst, or split between dst and litExtraBuffer, or stored entirely within litExtraBuffer */
ZSTD_allocateLiteralsBuffer(ZSTD_DCtx * dctx,void * const dst,const size_t dstCapacity,const size_t litSize,const streaming_operation streaming,const size_t expectedWriteSize,const unsigned splitImmediately)73*2aa14b1aSNick Terrell static void ZSTD_allocateLiteralsBuffer(ZSTD_DCtx* dctx, void* const dst, const size_t dstCapacity, const size_t litSize,
74*2aa14b1aSNick Terrell     const streaming_operation streaming, const size_t expectedWriteSize, const unsigned splitImmediately)
75*2aa14b1aSNick Terrell {
76*2aa14b1aSNick Terrell     if (streaming == not_streaming && dstCapacity > ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH + litSize + WILDCOPY_OVERLENGTH)
77*2aa14b1aSNick Terrell     {
78*2aa14b1aSNick Terrell         /* room for litbuffer to fit without read faulting */
79*2aa14b1aSNick Terrell         dctx->litBuffer = (BYTE*)dst + ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH;
80*2aa14b1aSNick Terrell         dctx->litBufferEnd = dctx->litBuffer + litSize;
81*2aa14b1aSNick Terrell         dctx->litBufferLocation = ZSTD_in_dst;
82*2aa14b1aSNick Terrell     }
83*2aa14b1aSNick Terrell     else if (litSize > ZSTD_LITBUFFEREXTRASIZE)
84*2aa14b1aSNick Terrell     {
85*2aa14b1aSNick Terrell         /* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */
86*2aa14b1aSNick Terrell         if (splitImmediately) {
87*2aa14b1aSNick Terrell             /* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */
88*2aa14b1aSNick Terrell             dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
89*2aa14b1aSNick Terrell             dctx->litBufferEnd = dctx->litBuffer + litSize - ZSTD_LITBUFFEREXTRASIZE;
90*2aa14b1aSNick Terrell         }
91*2aa14b1aSNick Terrell         else {
92*2aa14b1aSNick Terrell             /* initially this will be stored entirely in dst during huffman decoding, it will partially shifted to litExtraBuffer after */
93*2aa14b1aSNick Terrell             dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize;
94*2aa14b1aSNick Terrell             dctx->litBufferEnd = (BYTE*)dst + expectedWriteSize;
95*2aa14b1aSNick Terrell         }
96*2aa14b1aSNick Terrell         dctx->litBufferLocation = ZSTD_split;
97*2aa14b1aSNick Terrell     }
98*2aa14b1aSNick Terrell     else
99*2aa14b1aSNick Terrell     {
100*2aa14b1aSNick Terrell         /* fits entirely within litExtraBuffer, so no split is necessary */
101*2aa14b1aSNick Terrell         dctx->litBuffer = dctx->litExtraBuffer;
102*2aa14b1aSNick Terrell         dctx->litBufferEnd = dctx->litBuffer + litSize;
103*2aa14b1aSNick Terrell         dctx->litBufferLocation = ZSTD_not_in_dst;
104*2aa14b1aSNick Terrell     }
105*2aa14b1aSNick Terrell }
106e0c1b49fSNick Terrell 
107e0c1b49fSNick Terrell /* Hidden declaration for fullbench */
108e0c1b49fSNick Terrell size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
109*2aa14b1aSNick Terrell                           const void* src, size_t srcSize,
110*2aa14b1aSNick Terrell                           void* dst, size_t dstCapacity, const streaming_operation streaming);
111e0c1b49fSNick Terrell /*! ZSTD_decodeLiteralsBlock() :
112*2aa14b1aSNick Terrell  * Where it is possible to do so without being stomped by the output during decompression, the literals block will be stored
113*2aa14b1aSNick Terrell  * in the dstBuffer.  If there is room to do so, it will be stored in full in the excess dst space after where the current
114*2aa14b1aSNick Terrell  * block will be output.  Otherwise it will be stored at the end of the current dst blockspace, with a small portion being
115*2aa14b1aSNick Terrell  * stored in dctx->litExtraBuffer to help keep it "ahead" of the current output write.
116*2aa14b1aSNick Terrell  *
117e0c1b49fSNick Terrell  * @return : nb of bytes read from src (< srcSize )
118e0c1b49fSNick Terrell  *  note : symbol not declared but exposed for fullbench */
ZSTD_decodeLiteralsBlock(ZSTD_DCtx * dctx,const void * src,size_t srcSize,void * dst,size_t dstCapacity,const streaming_operation streaming)119e0c1b49fSNick Terrell size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
120*2aa14b1aSNick Terrell                           const void* src, size_t srcSize,   /* note : srcSize < BLOCKSIZE */
121*2aa14b1aSNick Terrell                           void* dst, size_t dstCapacity, const streaming_operation streaming)
122e0c1b49fSNick Terrell {
123e0c1b49fSNick Terrell     DEBUGLOG(5, "ZSTD_decodeLiteralsBlock");
124e0c1b49fSNick Terrell     RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, "");
125e0c1b49fSNick Terrell 
126e0c1b49fSNick Terrell     {   const BYTE* const istart = (const BYTE*) src;
127e0c1b49fSNick Terrell         symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3);
128e0c1b49fSNick Terrell 
129e0c1b49fSNick Terrell         switch(litEncType)
130e0c1b49fSNick Terrell         {
131e0c1b49fSNick Terrell         case set_repeat:
132e0c1b49fSNick Terrell             DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block");
133e0c1b49fSNick Terrell             RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted, "");
134e0c1b49fSNick Terrell             ZSTD_FALLTHROUGH;
135e0c1b49fSNick Terrell 
136e0c1b49fSNick Terrell         case set_compressed:
137e0c1b49fSNick Terrell             RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3");
138e0c1b49fSNick Terrell             {   size_t lhSize, litSize, litCSize;
139e0c1b49fSNick Terrell                 U32 singleStream=0;
140e0c1b49fSNick Terrell                 U32 const lhlCode = (istart[0] >> 2) & 3;
141e0c1b49fSNick Terrell                 U32 const lhc = MEM_readLE32(istart);
142e0c1b49fSNick Terrell                 size_t hufSuccess;
143*2aa14b1aSNick Terrell                 size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
144e0c1b49fSNick Terrell                 switch(lhlCode)
145e0c1b49fSNick Terrell                 {
146e0c1b49fSNick Terrell                 case 0: case 1: default:   /* note : default is impossible, since lhlCode into [0..3] */
147e0c1b49fSNick Terrell                     /* 2 - 2 - 10 - 10 */
148e0c1b49fSNick Terrell                     singleStream = !lhlCode;
149e0c1b49fSNick Terrell                     lhSize = 3;
150e0c1b49fSNick Terrell                     litSize  = (lhc >> 4) & 0x3FF;
151e0c1b49fSNick Terrell                     litCSize = (lhc >> 14) & 0x3FF;
152e0c1b49fSNick Terrell                     break;
153e0c1b49fSNick Terrell                 case 2:
154e0c1b49fSNick Terrell                     /* 2 - 2 - 14 - 14 */
155e0c1b49fSNick Terrell                     lhSize = 4;
156e0c1b49fSNick Terrell                     litSize  = (lhc >> 4) & 0x3FFF;
157e0c1b49fSNick Terrell                     litCSize = lhc >> 18;
158e0c1b49fSNick Terrell                     break;
159e0c1b49fSNick Terrell                 case 3:
160e0c1b49fSNick Terrell                     /* 2 - 2 - 18 - 18 */
161e0c1b49fSNick Terrell                     lhSize = 5;
162e0c1b49fSNick Terrell                     litSize  = (lhc >> 4) & 0x3FFFF;
163e0c1b49fSNick Terrell                     litCSize = (lhc >> 22) + ((size_t)istart[4] << 10);
164e0c1b49fSNick Terrell                     break;
165e0c1b49fSNick Terrell                 }
166*2aa14b1aSNick Terrell                 RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
167e0c1b49fSNick Terrell                 RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
168e0c1b49fSNick Terrell                 RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");
169*2aa14b1aSNick Terrell                 RETURN_ERROR_IF(expectedWriteSize < litSize , dstSize_tooSmall, "");
170*2aa14b1aSNick Terrell                 ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 0);
171e0c1b49fSNick Terrell 
172e0c1b49fSNick Terrell                 /* prefetch huffman table if cold */
173e0c1b49fSNick Terrell                 if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) {
174e0c1b49fSNick Terrell                     PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable));
175e0c1b49fSNick Terrell                 }
176e0c1b49fSNick Terrell 
177e0c1b49fSNick Terrell                 if (litEncType==set_repeat) {
178e0c1b49fSNick Terrell                     if (singleStream) {
179e0c1b49fSNick Terrell                         hufSuccess = HUF_decompress1X_usingDTable_bmi2(
180e0c1b49fSNick Terrell                             dctx->litBuffer, litSize, istart+lhSize, litCSize,
181*2aa14b1aSNick Terrell                             dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx));
182e0c1b49fSNick Terrell                     } else {
183e0c1b49fSNick Terrell                         hufSuccess = HUF_decompress4X_usingDTable_bmi2(
184e0c1b49fSNick Terrell                             dctx->litBuffer, litSize, istart+lhSize, litCSize,
185*2aa14b1aSNick Terrell                             dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx));
186e0c1b49fSNick Terrell                     }
187e0c1b49fSNick Terrell                 } else {
188e0c1b49fSNick Terrell                     if (singleStream) {
189e0c1b49fSNick Terrell #if defined(HUF_FORCE_DECOMPRESS_X2)
190e0c1b49fSNick Terrell                         hufSuccess = HUF_decompress1X_DCtx_wksp(
191e0c1b49fSNick Terrell                             dctx->entropy.hufTable, dctx->litBuffer, litSize,
192e0c1b49fSNick Terrell                             istart+lhSize, litCSize, dctx->workspace,
193e0c1b49fSNick Terrell                             sizeof(dctx->workspace));
194e0c1b49fSNick Terrell #else
195e0c1b49fSNick Terrell                         hufSuccess = HUF_decompress1X1_DCtx_wksp_bmi2(
196e0c1b49fSNick Terrell                             dctx->entropy.hufTable, dctx->litBuffer, litSize,
197e0c1b49fSNick Terrell                             istart+lhSize, litCSize, dctx->workspace,
198*2aa14b1aSNick Terrell                             sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx));
199e0c1b49fSNick Terrell #endif
200e0c1b49fSNick Terrell                     } else {
201e0c1b49fSNick Terrell                         hufSuccess = HUF_decompress4X_hufOnly_wksp_bmi2(
202e0c1b49fSNick Terrell                             dctx->entropy.hufTable, dctx->litBuffer, litSize,
203e0c1b49fSNick Terrell                             istart+lhSize, litCSize, dctx->workspace,
204*2aa14b1aSNick Terrell                             sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx));
205e0c1b49fSNick Terrell                     }
206e0c1b49fSNick Terrell                 }
207*2aa14b1aSNick Terrell                 if (dctx->litBufferLocation == ZSTD_split)
208*2aa14b1aSNick Terrell                 {
209*2aa14b1aSNick Terrell                     ZSTD_memcpy(dctx->litExtraBuffer, dctx->litBufferEnd - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
210*2aa14b1aSNick Terrell                     ZSTD_memmove(dctx->litBuffer + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH, dctx->litBuffer, litSize - ZSTD_LITBUFFEREXTRASIZE);
211*2aa14b1aSNick Terrell                     dctx->litBuffer += ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
212*2aa14b1aSNick Terrell                     dctx->litBufferEnd -= WILDCOPY_OVERLENGTH;
213*2aa14b1aSNick Terrell                 }
214e0c1b49fSNick Terrell 
215e0c1b49fSNick Terrell                 RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, "");
216e0c1b49fSNick Terrell 
217e0c1b49fSNick Terrell                 dctx->litPtr = dctx->litBuffer;
218e0c1b49fSNick Terrell                 dctx->litSize = litSize;
219e0c1b49fSNick Terrell                 dctx->litEntropy = 1;
220e0c1b49fSNick Terrell                 if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable;
221e0c1b49fSNick Terrell                 return litCSize + lhSize;
222e0c1b49fSNick Terrell             }
223e0c1b49fSNick Terrell 
224e0c1b49fSNick Terrell         case set_basic:
225e0c1b49fSNick Terrell             {   size_t litSize, lhSize;
226e0c1b49fSNick Terrell                 U32 const lhlCode = ((istart[0]) >> 2) & 3;
227*2aa14b1aSNick Terrell                 size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
228e0c1b49fSNick Terrell                 switch(lhlCode)
229e0c1b49fSNick Terrell                 {
230e0c1b49fSNick Terrell                 case 0: case 2: default:   /* note : default is impossible, since lhlCode into [0..3] */
231e0c1b49fSNick Terrell                     lhSize = 1;
232e0c1b49fSNick Terrell                     litSize = istart[0] >> 3;
233e0c1b49fSNick Terrell                     break;
234e0c1b49fSNick Terrell                 case 1:
235e0c1b49fSNick Terrell                     lhSize = 2;
236e0c1b49fSNick Terrell                     litSize = MEM_readLE16(istart) >> 4;
237e0c1b49fSNick Terrell                     break;
238e0c1b49fSNick Terrell                 case 3:
239e0c1b49fSNick Terrell                     lhSize = 3;
240e0c1b49fSNick Terrell                     litSize = MEM_readLE24(istart) >> 4;
241e0c1b49fSNick Terrell                     break;
242e0c1b49fSNick Terrell                 }
243e0c1b49fSNick Terrell 
244*2aa14b1aSNick Terrell                 RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
245*2aa14b1aSNick Terrell                 RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
246*2aa14b1aSNick Terrell                 ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);
247e0c1b49fSNick Terrell                 if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) {  /* risk reading beyond src buffer with wildcopy */
248e0c1b49fSNick Terrell                     RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, "");
249*2aa14b1aSNick Terrell                     if (dctx->litBufferLocation == ZSTD_split)
250*2aa14b1aSNick Terrell                     {
251*2aa14b1aSNick Terrell                         ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize - ZSTD_LITBUFFEREXTRASIZE);
252*2aa14b1aSNick Terrell                         ZSTD_memcpy(dctx->litExtraBuffer, istart + lhSize + litSize - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
253*2aa14b1aSNick Terrell                     }
254*2aa14b1aSNick Terrell                     else
255*2aa14b1aSNick Terrell                     {
256e0c1b49fSNick Terrell                         ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize);
257*2aa14b1aSNick Terrell                     }
258e0c1b49fSNick Terrell                     dctx->litPtr = dctx->litBuffer;
259e0c1b49fSNick Terrell                     dctx->litSize = litSize;
260e0c1b49fSNick Terrell                     return lhSize+litSize;
261e0c1b49fSNick Terrell                 }
262e0c1b49fSNick Terrell                 /* direct reference into compressed stream */
263e0c1b49fSNick Terrell                 dctx->litPtr = istart+lhSize;
264e0c1b49fSNick Terrell                 dctx->litSize = litSize;
265*2aa14b1aSNick Terrell                 dctx->litBufferEnd = dctx->litPtr + litSize;
266*2aa14b1aSNick Terrell                 dctx->litBufferLocation = ZSTD_not_in_dst;
267e0c1b49fSNick Terrell                 return lhSize+litSize;
268e0c1b49fSNick Terrell             }
269e0c1b49fSNick Terrell 
270e0c1b49fSNick Terrell         case set_rle:
271e0c1b49fSNick Terrell             {   U32 const lhlCode = ((istart[0]) >> 2) & 3;
272e0c1b49fSNick Terrell                 size_t litSize, lhSize;
273*2aa14b1aSNick Terrell                 size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
274e0c1b49fSNick Terrell                 switch(lhlCode)
275e0c1b49fSNick Terrell                 {
276e0c1b49fSNick Terrell                 case 0: case 2: default:   /* note : default is impossible, since lhlCode into [0..3] */
277e0c1b49fSNick Terrell                     lhSize = 1;
278e0c1b49fSNick Terrell                     litSize = istart[0] >> 3;
279e0c1b49fSNick Terrell                     break;
280e0c1b49fSNick Terrell                 case 1:
281e0c1b49fSNick Terrell                     lhSize = 2;
282e0c1b49fSNick Terrell                     litSize = MEM_readLE16(istart) >> 4;
283e0c1b49fSNick Terrell                     break;
284e0c1b49fSNick Terrell                 case 3:
285e0c1b49fSNick Terrell                     lhSize = 3;
286e0c1b49fSNick Terrell                     litSize = MEM_readLE24(istart) >> 4;
287e0c1b49fSNick Terrell                     RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4");
288e0c1b49fSNick Terrell                     break;
289e0c1b49fSNick Terrell                 }
290*2aa14b1aSNick Terrell                 RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
291e0c1b49fSNick Terrell                 RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
292*2aa14b1aSNick Terrell                 RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
293*2aa14b1aSNick Terrell                 ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);
294*2aa14b1aSNick Terrell                 if (dctx->litBufferLocation == ZSTD_split)
295*2aa14b1aSNick Terrell                 {
296*2aa14b1aSNick Terrell                     ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize - ZSTD_LITBUFFEREXTRASIZE);
297*2aa14b1aSNick Terrell                     ZSTD_memset(dctx->litExtraBuffer, istart[lhSize], ZSTD_LITBUFFEREXTRASIZE);
298*2aa14b1aSNick Terrell                 }
299*2aa14b1aSNick Terrell                 else
300*2aa14b1aSNick Terrell                 {
301*2aa14b1aSNick Terrell                     ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize);
302*2aa14b1aSNick Terrell                 }
303e0c1b49fSNick Terrell                 dctx->litPtr = dctx->litBuffer;
304e0c1b49fSNick Terrell                 dctx->litSize = litSize;
305e0c1b49fSNick Terrell                 return lhSize+1;
306e0c1b49fSNick Terrell             }
307e0c1b49fSNick Terrell         default:
308e0c1b49fSNick Terrell             RETURN_ERROR(corruption_detected, "impossible");
309e0c1b49fSNick Terrell         }
310e0c1b49fSNick Terrell     }
311e0c1b49fSNick Terrell }
312e0c1b49fSNick Terrell 
313e0c1b49fSNick Terrell /* Default FSE distribution tables.
314e0c1b49fSNick Terrell  * These are pre-calculated FSE decoding tables using default distributions as defined in specification :
315e0c1b49fSNick Terrell  * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions
316e0c1b49fSNick Terrell  * They were generated programmatically with following method :
317e0c1b49fSNick Terrell  * - start from default distributions, present in /lib/common/zstd_internal.h
318e0c1b49fSNick Terrell  * - generate tables normally, using ZSTD_buildFSETable()
319e0c1b49fSNick Terrell  * - printout the content of tables
320e0c1b49fSNick Terrell  * - pretify output, report below, test with fuzzer to ensure it's correct */
321e0c1b49fSNick Terrell 
322e0c1b49fSNick Terrell /* Default FSE distribution table for Literal Lengths */
323e0c1b49fSNick Terrell static const ZSTD_seqSymbol LL_defaultDTable[(1<<LL_DEFAULTNORMLOG)+1] = {
324e0c1b49fSNick Terrell      {  1,  1,  1, LL_DEFAULTNORMLOG},  /* header : fastMode, tableLog */
325e0c1b49fSNick Terrell      /* nextState, nbAddBits, nbBits, baseVal */
326e0c1b49fSNick Terrell      {  0,  0,  4,    0},  { 16,  0,  4,    0},
327e0c1b49fSNick Terrell      { 32,  0,  5,    1},  {  0,  0,  5,    3},
328e0c1b49fSNick Terrell      {  0,  0,  5,    4},  {  0,  0,  5,    6},
329e0c1b49fSNick Terrell      {  0,  0,  5,    7},  {  0,  0,  5,    9},
330e0c1b49fSNick Terrell      {  0,  0,  5,   10},  {  0,  0,  5,   12},
331e0c1b49fSNick Terrell      {  0,  0,  6,   14},  {  0,  1,  5,   16},
332e0c1b49fSNick Terrell      {  0,  1,  5,   20},  {  0,  1,  5,   22},
333e0c1b49fSNick Terrell      {  0,  2,  5,   28},  {  0,  3,  5,   32},
334e0c1b49fSNick Terrell      {  0,  4,  5,   48},  { 32,  6,  5,   64},
335e0c1b49fSNick Terrell      {  0,  7,  5,  128},  {  0,  8,  6,  256},
336e0c1b49fSNick Terrell      {  0, 10,  6, 1024},  {  0, 12,  6, 4096},
337e0c1b49fSNick Terrell      { 32,  0,  4,    0},  {  0,  0,  4,    1},
338e0c1b49fSNick Terrell      {  0,  0,  5,    2},  { 32,  0,  5,    4},
339e0c1b49fSNick Terrell      {  0,  0,  5,    5},  { 32,  0,  5,    7},
340e0c1b49fSNick Terrell      {  0,  0,  5,    8},  { 32,  0,  5,   10},
341e0c1b49fSNick Terrell      {  0,  0,  5,   11},  {  0,  0,  6,   13},
342e0c1b49fSNick Terrell      { 32,  1,  5,   16},  {  0,  1,  5,   18},
343e0c1b49fSNick Terrell      { 32,  1,  5,   22},  {  0,  2,  5,   24},
344e0c1b49fSNick Terrell      { 32,  3,  5,   32},  {  0,  3,  5,   40},
345e0c1b49fSNick Terrell      {  0,  6,  4,   64},  { 16,  6,  4,   64},
346e0c1b49fSNick Terrell      { 32,  7,  5,  128},  {  0,  9,  6,  512},
347e0c1b49fSNick Terrell      {  0, 11,  6, 2048},  { 48,  0,  4,    0},
348e0c1b49fSNick Terrell      { 16,  0,  4,    1},  { 32,  0,  5,    2},
349e0c1b49fSNick Terrell      { 32,  0,  5,    3},  { 32,  0,  5,    5},
350e0c1b49fSNick Terrell      { 32,  0,  5,    6},  { 32,  0,  5,    8},
351e0c1b49fSNick Terrell      { 32,  0,  5,    9},  { 32,  0,  5,   11},
352e0c1b49fSNick Terrell      { 32,  0,  5,   12},  {  0,  0,  6,   15},
353e0c1b49fSNick Terrell      { 32,  1,  5,   18},  { 32,  1,  5,   20},
354e0c1b49fSNick Terrell      { 32,  2,  5,   24},  { 32,  2,  5,   28},
355e0c1b49fSNick Terrell      { 32,  3,  5,   40},  { 32,  4,  5,   48},
356e0c1b49fSNick Terrell      {  0, 16,  6,65536},  {  0, 15,  6,32768},
357e0c1b49fSNick Terrell      {  0, 14,  6,16384},  {  0, 13,  6, 8192},
358e0c1b49fSNick Terrell };   /* LL_defaultDTable */
359e0c1b49fSNick Terrell 
360e0c1b49fSNick Terrell /* Default FSE distribution table for Offset Codes */
361e0c1b49fSNick Terrell static const ZSTD_seqSymbol OF_defaultDTable[(1<<OF_DEFAULTNORMLOG)+1] = {
362e0c1b49fSNick Terrell     {  1,  1,  1, OF_DEFAULTNORMLOG},  /* header : fastMode, tableLog */
363e0c1b49fSNick Terrell     /* nextState, nbAddBits, nbBits, baseVal */
364e0c1b49fSNick Terrell     {  0,  0,  5,    0},     {  0,  6,  4,   61},
365e0c1b49fSNick Terrell     {  0,  9,  5,  509},     {  0, 15,  5,32765},
366e0c1b49fSNick Terrell     {  0, 21,  5,2097149},   {  0,  3,  5,    5},
367e0c1b49fSNick Terrell     {  0,  7,  4,  125},     {  0, 12,  5, 4093},
368e0c1b49fSNick Terrell     {  0, 18,  5,262141},    {  0, 23,  5,8388605},
369e0c1b49fSNick Terrell     {  0,  5,  5,   29},     {  0,  8,  4,  253},
370e0c1b49fSNick Terrell     {  0, 14,  5,16381},     {  0, 20,  5,1048573},
371e0c1b49fSNick Terrell     {  0,  2,  5,    1},     { 16,  7,  4,  125},
372e0c1b49fSNick Terrell     {  0, 11,  5, 2045},     {  0, 17,  5,131069},
373e0c1b49fSNick Terrell     {  0, 22,  5,4194301},   {  0,  4,  5,   13},
374e0c1b49fSNick Terrell     { 16,  8,  4,  253},     {  0, 13,  5, 8189},
375e0c1b49fSNick Terrell     {  0, 19,  5,524285},    {  0,  1,  5,    1},
376e0c1b49fSNick Terrell     { 16,  6,  4,   61},     {  0, 10,  5, 1021},
377e0c1b49fSNick Terrell     {  0, 16,  5,65533},     {  0, 28,  5,268435453},
378e0c1b49fSNick Terrell     {  0, 27,  5,134217725}, {  0, 26,  5,67108861},
379e0c1b49fSNick Terrell     {  0, 25,  5,33554429},  {  0, 24,  5,16777213},
380e0c1b49fSNick Terrell };   /* OF_defaultDTable */
381e0c1b49fSNick Terrell 
382e0c1b49fSNick Terrell 
383e0c1b49fSNick Terrell /* Default FSE distribution table for Match Lengths */
384e0c1b49fSNick Terrell static const ZSTD_seqSymbol ML_defaultDTable[(1<<ML_DEFAULTNORMLOG)+1] = {
385e0c1b49fSNick Terrell     {  1,  1,  1, ML_DEFAULTNORMLOG},  /* header : fastMode, tableLog */
386e0c1b49fSNick Terrell     /* nextState, nbAddBits, nbBits, baseVal */
387e0c1b49fSNick Terrell     {  0,  0,  6,    3},  {  0,  0,  4,    4},
388e0c1b49fSNick Terrell     { 32,  0,  5,    5},  {  0,  0,  5,    6},
389e0c1b49fSNick Terrell     {  0,  0,  5,    8},  {  0,  0,  5,    9},
390e0c1b49fSNick Terrell     {  0,  0,  5,   11},  {  0,  0,  6,   13},
391e0c1b49fSNick Terrell     {  0,  0,  6,   16},  {  0,  0,  6,   19},
392e0c1b49fSNick Terrell     {  0,  0,  6,   22},  {  0,  0,  6,   25},
393e0c1b49fSNick Terrell     {  0,  0,  6,   28},  {  0,  0,  6,   31},
394e0c1b49fSNick Terrell     {  0,  0,  6,   34},  {  0,  1,  6,   37},
395e0c1b49fSNick Terrell     {  0,  1,  6,   41},  {  0,  2,  6,   47},
396e0c1b49fSNick Terrell     {  0,  3,  6,   59},  {  0,  4,  6,   83},
397e0c1b49fSNick Terrell     {  0,  7,  6,  131},  {  0,  9,  6,  515},
398e0c1b49fSNick Terrell     { 16,  0,  4,    4},  {  0,  0,  4,    5},
399e0c1b49fSNick Terrell     { 32,  0,  5,    6},  {  0,  0,  5,    7},
400e0c1b49fSNick Terrell     { 32,  0,  5,    9},  {  0,  0,  5,   10},
401e0c1b49fSNick Terrell     {  0,  0,  6,   12},  {  0,  0,  6,   15},
402e0c1b49fSNick Terrell     {  0,  0,  6,   18},  {  0,  0,  6,   21},
403e0c1b49fSNick Terrell     {  0,  0,  6,   24},  {  0,  0,  6,   27},
404e0c1b49fSNick Terrell     {  0,  0,  6,   30},  {  0,  0,  6,   33},
405e0c1b49fSNick Terrell     {  0,  1,  6,   35},  {  0,  1,  6,   39},
406e0c1b49fSNick Terrell     {  0,  2,  6,   43},  {  0,  3,  6,   51},
407e0c1b49fSNick Terrell     {  0,  4,  6,   67},  {  0,  5,  6,   99},
408e0c1b49fSNick Terrell     {  0,  8,  6,  259},  { 32,  0,  4,    4},
409e0c1b49fSNick Terrell     { 48,  0,  4,    4},  { 16,  0,  4,    5},
410e0c1b49fSNick Terrell     { 32,  0,  5,    7},  { 32,  0,  5,    8},
411e0c1b49fSNick Terrell     { 32,  0,  5,   10},  { 32,  0,  5,   11},
412e0c1b49fSNick Terrell     {  0,  0,  6,   14},  {  0,  0,  6,   17},
413e0c1b49fSNick Terrell     {  0,  0,  6,   20},  {  0,  0,  6,   23},
414e0c1b49fSNick Terrell     {  0,  0,  6,   26},  {  0,  0,  6,   29},
415e0c1b49fSNick Terrell     {  0,  0,  6,   32},  {  0, 16,  6,65539},
416e0c1b49fSNick Terrell     {  0, 15,  6,32771},  {  0, 14,  6,16387},
417e0c1b49fSNick Terrell     {  0, 13,  6, 8195},  {  0, 12,  6, 4099},
418e0c1b49fSNick Terrell     {  0, 11,  6, 2051},  {  0, 10,  6, 1027},
419e0c1b49fSNick Terrell };   /* ML_defaultDTable */
420e0c1b49fSNick Terrell 
421e0c1b49fSNick Terrell 
ZSTD_buildSeqTable_rle(ZSTD_seqSymbol * dt,U32 baseValue,U8 nbAddBits)422*2aa14b1aSNick Terrell static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U8 nbAddBits)
423e0c1b49fSNick Terrell {
424e0c1b49fSNick Terrell     void* ptr = dt;
425e0c1b49fSNick Terrell     ZSTD_seqSymbol_header* const DTableH = (ZSTD_seqSymbol_header*)ptr;
426e0c1b49fSNick Terrell     ZSTD_seqSymbol* const cell = dt + 1;
427e0c1b49fSNick Terrell 
428e0c1b49fSNick Terrell     DTableH->tableLog = 0;
429e0c1b49fSNick Terrell     DTableH->fastMode = 0;
430e0c1b49fSNick Terrell 
431e0c1b49fSNick Terrell     cell->nbBits = 0;
432e0c1b49fSNick Terrell     cell->nextState = 0;
433e0c1b49fSNick Terrell     assert(nbAddBits < 255);
434*2aa14b1aSNick Terrell     cell->nbAdditionalBits = nbAddBits;
435e0c1b49fSNick Terrell     cell->baseValue = baseValue;
436e0c1b49fSNick Terrell }
437e0c1b49fSNick Terrell 
438e0c1b49fSNick Terrell 
439e0c1b49fSNick Terrell /* ZSTD_buildFSETable() :
440e0c1b49fSNick Terrell  * generate FSE decoding table for one symbol (ll, ml or off)
441e0c1b49fSNick Terrell  * cannot fail if input is valid =>
442e0c1b49fSNick Terrell  * all inputs are presumed validated at this stage */
443e0c1b49fSNick Terrell FORCE_INLINE_TEMPLATE
ZSTD_buildFSETable_body(ZSTD_seqSymbol * dt,const short * normalizedCounter,unsigned maxSymbolValue,const U32 * baseValue,const U8 * nbAdditionalBits,unsigned tableLog,void * wksp,size_t wkspSize)444e0c1b49fSNick Terrell void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
445e0c1b49fSNick Terrell             const short* normalizedCounter, unsigned maxSymbolValue,
446*2aa14b1aSNick Terrell             const U32* baseValue, const U8* nbAdditionalBits,
447e0c1b49fSNick Terrell             unsigned tableLog, void* wksp, size_t wkspSize)
448e0c1b49fSNick Terrell {
449e0c1b49fSNick Terrell     ZSTD_seqSymbol* const tableDecode = dt+1;
450e0c1b49fSNick Terrell     U32 const maxSV1 = maxSymbolValue + 1;
451e0c1b49fSNick Terrell     U32 const tableSize = 1 << tableLog;
452e0c1b49fSNick Terrell 
453e0c1b49fSNick Terrell     U16* symbolNext = (U16*)wksp;
454e0c1b49fSNick Terrell     BYTE* spread = (BYTE*)(symbolNext + MaxSeq + 1);
455e0c1b49fSNick Terrell     U32 highThreshold = tableSize - 1;
456e0c1b49fSNick Terrell 
457e0c1b49fSNick Terrell 
458e0c1b49fSNick Terrell     /* Sanity Checks */
459e0c1b49fSNick Terrell     assert(maxSymbolValue <= MaxSeq);
460e0c1b49fSNick Terrell     assert(tableLog <= MaxFSELog);
461e0c1b49fSNick Terrell     assert(wkspSize >= ZSTD_BUILD_FSE_TABLE_WKSP_SIZE);
462e0c1b49fSNick Terrell     (void)wkspSize;
463e0c1b49fSNick Terrell     /* Init, lay down lowprob symbols */
464e0c1b49fSNick Terrell     {   ZSTD_seqSymbol_header DTableH;
465e0c1b49fSNick Terrell         DTableH.tableLog = tableLog;
466e0c1b49fSNick Terrell         DTableH.fastMode = 1;
467e0c1b49fSNick Terrell         {   S16 const largeLimit= (S16)(1 << (tableLog-1));
468e0c1b49fSNick Terrell             U32 s;
469e0c1b49fSNick Terrell             for (s=0; s<maxSV1; s++) {
470e0c1b49fSNick Terrell                 if (normalizedCounter[s]==-1) {
471e0c1b49fSNick Terrell                     tableDecode[highThreshold--].baseValue = s;
472e0c1b49fSNick Terrell                     symbolNext[s] = 1;
473e0c1b49fSNick Terrell                 } else {
474e0c1b49fSNick Terrell                     if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
475e0c1b49fSNick Terrell                     assert(normalizedCounter[s]>=0);
476e0c1b49fSNick Terrell                     symbolNext[s] = (U16)normalizedCounter[s];
477e0c1b49fSNick Terrell         }   }   }
478e0c1b49fSNick Terrell         ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
479e0c1b49fSNick Terrell     }
480e0c1b49fSNick Terrell 
481e0c1b49fSNick Terrell     /* Spread symbols */
482e0c1b49fSNick Terrell     assert(tableSize <= 512);
483e0c1b49fSNick Terrell     /* Specialized symbol spreading for the case when there are
484e0c1b49fSNick Terrell      * no low probability (-1 count) symbols. When compressing
485e0c1b49fSNick Terrell      * small blocks we avoid low probability symbols to hit this
486e0c1b49fSNick Terrell      * case, since header decoding speed matters more.
487e0c1b49fSNick Terrell      */
488e0c1b49fSNick Terrell     if (highThreshold == tableSize - 1) {
489e0c1b49fSNick Terrell         size_t const tableMask = tableSize-1;
490e0c1b49fSNick Terrell         size_t const step = FSE_TABLESTEP(tableSize);
491e0c1b49fSNick Terrell         /* First lay down the symbols in order.
492e0c1b49fSNick Terrell          * We use a uint64_t to lay down 8 bytes at a time. This reduces branch
493e0c1b49fSNick Terrell          * misses since small blocks generally have small table logs, so nearly
494e0c1b49fSNick Terrell          * all symbols have counts <= 8. We ensure we have 8 bytes at the end of
495e0c1b49fSNick Terrell          * our buffer to handle the over-write.
496e0c1b49fSNick Terrell          */
497e0c1b49fSNick Terrell         {
498e0c1b49fSNick Terrell             U64 const add = 0x0101010101010101ull;
499e0c1b49fSNick Terrell             size_t pos = 0;
500e0c1b49fSNick Terrell             U64 sv = 0;
501e0c1b49fSNick Terrell             U32 s;
502e0c1b49fSNick Terrell             for (s=0; s<maxSV1; ++s, sv += add) {
503e0c1b49fSNick Terrell                 int i;
504e0c1b49fSNick Terrell                 int const n = normalizedCounter[s];
505e0c1b49fSNick Terrell                 MEM_write64(spread + pos, sv);
506e0c1b49fSNick Terrell                 for (i = 8; i < n; i += 8) {
507e0c1b49fSNick Terrell                     MEM_write64(spread + pos + i, sv);
508e0c1b49fSNick Terrell                 }
509e0c1b49fSNick Terrell                 pos += n;
510e0c1b49fSNick Terrell             }
511e0c1b49fSNick Terrell         }
512e0c1b49fSNick Terrell         /* Now we spread those positions across the table.
513e0c1b49fSNick Terrell          * The benefit of doing it in two stages is that we avoid the the
514e0c1b49fSNick Terrell          * variable size inner loop, which caused lots of branch misses.
515e0c1b49fSNick Terrell          * Now we can run through all the positions without any branch misses.
516e0c1b49fSNick Terrell          * We unroll the loop twice, since that is what emperically worked best.
517e0c1b49fSNick Terrell          */
518e0c1b49fSNick Terrell         {
519e0c1b49fSNick Terrell             size_t position = 0;
520e0c1b49fSNick Terrell             size_t s;
521e0c1b49fSNick Terrell             size_t const unroll = 2;
522e0c1b49fSNick Terrell             assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
523e0c1b49fSNick Terrell             for (s = 0; s < (size_t)tableSize; s += unroll) {
524e0c1b49fSNick Terrell                 size_t u;
525e0c1b49fSNick Terrell                 for (u = 0; u < unroll; ++u) {
526e0c1b49fSNick Terrell                     size_t const uPosition = (position + (u * step)) & tableMask;
527e0c1b49fSNick Terrell                     tableDecode[uPosition].baseValue = spread[s + u];
528e0c1b49fSNick Terrell                 }
529e0c1b49fSNick Terrell                 position = (position + (unroll * step)) & tableMask;
530e0c1b49fSNick Terrell             }
531e0c1b49fSNick Terrell             assert(position == 0);
532e0c1b49fSNick Terrell         }
533e0c1b49fSNick Terrell     } else {
534e0c1b49fSNick Terrell         U32 const tableMask = tableSize-1;
535e0c1b49fSNick Terrell         U32 const step = FSE_TABLESTEP(tableSize);
536e0c1b49fSNick Terrell         U32 s, position = 0;
537e0c1b49fSNick Terrell         for (s=0; s<maxSV1; s++) {
538e0c1b49fSNick Terrell             int i;
539e0c1b49fSNick Terrell             int const n = normalizedCounter[s];
540e0c1b49fSNick Terrell             for (i=0; i<n; i++) {
541e0c1b49fSNick Terrell                 tableDecode[position].baseValue = s;
542e0c1b49fSNick Terrell                 position = (position + step) & tableMask;
543e0c1b49fSNick Terrell                 while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
544e0c1b49fSNick Terrell         }   }
545e0c1b49fSNick Terrell         assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
546e0c1b49fSNick Terrell     }
547e0c1b49fSNick Terrell 
548e0c1b49fSNick Terrell     /* Build Decoding table */
549e0c1b49fSNick Terrell     {
550e0c1b49fSNick Terrell         U32 u;
551e0c1b49fSNick Terrell         for (u=0; u<tableSize; u++) {
552e0c1b49fSNick Terrell             U32 const symbol = tableDecode[u].baseValue;
553e0c1b49fSNick Terrell             U32 const nextState = symbolNext[symbol]++;
554e0c1b49fSNick Terrell             tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
555e0c1b49fSNick Terrell             tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
556e0c1b49fSNick Terrell             assert(nbAdditionalBits[symbol] < 255);
557*2aa14b1aSNick Terrell             tableDecode[u].nbAdditionalBits = nbAdditionalBits[symbol];
558e0c1b49fSNick Terrell             tableDecode[u].baseValue = baseValue[symbol];
559e0c1b49fSNick Terrell         }
560e0c1b49fSNick Terrell     }
561e0c1b49fSNick Terrell }
562e0c1b49fSNick Terrell 
563e0c1b49fSNick Terrell /* Avoids the FORCE_INLINE of the _body() function. */
ZSTD_buildFSETable_body_default(ZSTD_seqSymbol * dt,const short * normalizedCounter,unsigned maxSymbolValue,const U32 * baseValue,const U8 * nbAdditionalBits,unsigned tableLog,void * wksp,size_t wkspSize)564e0c1b49fSNick Terrell static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol* dt,
565e0c1b49fSNick Terrell             const short* normalizedCounter, unsigned maxSymbolValue,
566*2aa14b1aSNick Terrell             const U32* baseValue, const U8* nbAdditionalBits,
567e0c1b49fSNick Terrell             unsigned tableLog, void* wksp, size_t wkspSize)
568e0c1b49fSNick Terrell {
569e0c1b49fSNick Terrell     ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
570e0c1b49fSNick Terrell             baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
571e0c1b49fSNick Terrell }
572e0c1b49fSNick Terrell 
573e0c1b49fSNick Terrell #if DYNAMIC_BMI2
ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol * dt,const short * normalizedCounter,unsigned maxSymbolValue,const U32 * baseValue,const U8 * nbAdditionalBits,unsigned tableLog,void * wksp,size_t wkspSize)574*2aa14b1aSNick Terrell BMI2_TARGET_ATTRIBUTE static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol* dt,
575e0c1b49fSNick Terrell             const short* normalizedCounter, unsigned maxSymbolValue,
576*2aa14b1aSNick Terrell             const U32* baseValue, const U8* nbAdditionalBits,
577e0c1b49fSNick Terrell             unsigned tableLog, void* wksp, size_t wkspSize)
578e0c1b49fSNick Terrell {
579e0c1b49fSNick Terrell     ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
580e0c1b49fSNick Terrell             baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
581e0c1b49fSNick Terrell }
582e0c1b49fSNick Terrell #endif
583e0c1b49fSNick Terrell 
ZSTD_buildFSETable(ZSTD_seqSymbol * dt,const short * normalizedCounter,unsigned maxSymbolValue,const U32 * baseValue,const U8 * nbAdditionalBits,unsigned tableLog,void * wksp,size_t wkspSize,int bmi2)584e0c1b49fSNick Terrell void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
585e0c1b49fSNick Terrell             const short* normalizedCounter, unsigned maxSymbolValue,
586*2aa14b1aSNick Terrell             const U32* baseValue, const U8* nbAdditionalBits,
587e0c1b49fSNick Terrell             unsigned tableLog, void* wksp, size_t wkspSize, int bmi2)
588e0c1b49fSNick Terrell {
589e0c1b49fSNick Terrell #if DYNAMIC_BMI2
590e0c1b49fSNick Terrell     if (bmi2) {
591e0c1b49fSNick Terrell         ZSTD_buildFSETable_body_bmi2(dt, normalizedCounter, maxSymbolValue,
592e0c1b49fSNick Terrell                 baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
593e0c1b49fSNick Terrell         return;
594e0c1b49fSNick Terrell     }
595e0c1b49fSNick Terrell #endif
596e0c1b49fSNick Terrell     (void)bmi2;
597e0c1b49fSNick Terrell     ZSTD_buildFSETable_body_default(dt, normalizedCounter, maxSymbolValue,
598e0c1b49fSNick Terrell             baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
599e0c1b49fSNick Terrell }
600e0c1b49fSNick Terrell 
601e0c1b49fSNick Terrell 
602e0c1b49fSNick Terrell /*! ZSTD_buildSeqTable() :
603e0c1b49fSNick Terrell  * @return : nb bytes read from src,
604e0c1b49fSNick Terrell  *           or an error code if it fails */
ZSTD_buildSeqTable(ZSTD_seqSymbol * DTableSpace,const ZSTD_seqSymbol ** DTablePtr,symbolEncodingType_e type,unsigned max,U32 maxLog,const void * src,size_t srcSize,const U32 * baseValue,const U8 * nbAdditionalBits,const ZSTD_seqSymbol * defaultTable,U32 flagRepeatTable,int ddictIsCold,int nbSeq,U32 * wksp,size_t wkspSize,int bmi2)605e0c1b49fSNick Terrell static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymbol** DTablePtr,
606e0c1b49fSNick Terrell                                  symbolEncodingType_e type, unsigned max, U32 maxLog,
607e0c1b49fSNick Terrell                                  const void* src, size_t srcSize,
608*2aa14b1aSNick Terrell                                  const U32* baseValue, const U8* nbAdditionalBits,
609e0c1b49fSNick Terrell                                  const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,
610e0c1b49fSNick Terrell                                  int ddictIsCold, int nbSeq, U32* wksp, size_t wkspSize,
611e0c1b49fSNick Terrell                                  int bmi2)
612e0c1b49fSNick Terrell {
613e0c1b49fSNick Terrell     switch(type)
614e0c1b49fSNick Terrell     {
615e0c1b49fSNick Terrell     case set_rle :
616e0c1b49fSNick Terrell         RETURN_ERROR_IF(!srcSize, srcSize_wrong, "");
617e0c1b49fSNick Terrell         RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected, "");
618e0c1b49fSNick Terrell         {   U32 const symbol = *(const BYTE*)src;
619e0c1b49fSNick Terrell             U32 const baseline = baseValue[symbol];
620*2aa14b1aSNick Terrell             U8 const nbBits = nbAdditionalBits[symbol];
621e0c1b49fSNick Terrell             ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits);
622e0c1b49fSNick Terrell         }
623e0c1b49fSNick Terrell         *DTablePtr = DTableSpace;
624e0c1b49fSNick Terrell         return 1;
625e0c1b49fSNick Terrell     case set_basic :
626e0c1b49fSNick Terrell         *DTablePtr = defaultTable;
627e0c1b49fSNick Terrell         return 0;
628e0c1b49fSNick Terrell     case set_repeat:
629e0c1b49fSNick Terrell         RETURN_ERROR_IF(!flagRepeatTable, corruption_detected, "");
630e0c1b49fSNick Terrell         /* prefetch FSE table if used */
631e0c1b49fSNick Terrell         if (ddictIsCold && (nbSeq > 24 /* heuristic */)) {
632e0c1b49fSNick Terrell             const void* const pStart = *DTablePtr;
633e0c1b49fSNick Terrell             size_t const pSize = sizeof(ZSTD_seqSymbol) * (SEQSYMBOL_TABLE_SIZE(maxLog));
634e0c1b49fSNick Terrell             PREFETCH_AREA(pStart, pSize);
635e0c1b49fSNick Terrell         }
636e0c1b49fSNick Terrell         return 0;
637e0c1b49fSNick Terrell     case set_compressed :
638e0c1b49fSNick Terrell         {   unsigned tableLog;
639e0c1b49fSNick Terrell             S16 norm[MaxSeq+1];
640e0c1b49fSNick Terrell             size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
641e0c1b49fSNick Terrell             RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, "");
642e0c1b49fSNick Terrell             RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, "");
643e0c1b49fSNick Terrell             ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog, wksp, wkspSize, bmi2);
644e0c1b49fSNick Terrell             *DTablePtr = DTableSpace;
645e0c1b49fSNick Terrell             return headerSize;
646e0c1b49fSNick Terrell         }
647e0c1b49fSNick Terrell     default :
648e0c1b49fSNick Terrell         assert(0);
649e0c1b49fSNick Terrell         RETURN_ERROR(GENERIC, "impossible");
650e0c1b49fSNick Terrell     }
651e0c1b49fSNick Terrell }
652e0c1b49fSNick Terrell 
ZSTD_decodeSeqHeaders(ZSTD_DCtx * dctx,int * nbSeqPtr,const void * src,size_t srcSize)653e0c1b49fSNick Terrell size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
654e0c1b49fSNick Terrell                              const void* src, size_t srcSize)
655e0c1b49fSNick Terrell {
656e0c1b49fSNick Terrell     const BYTE* const istart = (const BYTE*)src;
657e0c1b49fSNick Terrell     const BYTE* const iend = istart + srcSize;
658e0c1b49fSNick Terrell     const BYTE* ip = istart;
659e0c1b49fSNick Terrell     int nbSeq;
660e0c1b49fSNick Terrell     DEBUGLOG(5, "ZSTD_decodeSeqHeaders");
661e0c1b49fSNick Terrell 
662e0c1b49fSNick Terrell     /* check */
663e0c1b49fSNick Terrell     RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong, "");
664e0c1b49fSNick Terrell 
665e0c1b49fSNick Terrell     /* SeqHead */
666e0c1b49fSNick Terrell     nbSeq = *ip++;
667e0c1b49fSNick Terrell     if (!nbSeq) {
668e0c1b49fSNick Terrell         *nbSeqPtr=0;
669e0c1b49fSNick Terrell         RETURN_ERROR_IF(srcSize != 1, srcSize_wrong, "");
670e0c1b49fSNick Terrell         return 1;
671e0c1b49fSNick Terrell     }
672e0c1b49fSNick Terrell     if (nbSeq > 0x7F) {
673e0c1b49fSNick Terrell         if (nbSeq == 0xFF) {
674e0c1b49fSNick Terrell             RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, "");
675e0c1b49fSNick Terrell             nbSeq = MEM_readLE16(ip) + LONGNBSEQ;
676e0c1b49fSNick Terrell             ip+=2;
677e0c1b49fSNick Terrell         } else {
678e0c1b49fSNick Terrell             RETURN_ERROR_IF(ip >= iend, srcSize_wrong, "");
679e0c1b49fSNick Terrell             nbSeq = ((nbSeq-0x80)<<8) + *ip++;
680e0c1b49fSNick Terrell         }
681e0c1b49fSNick Terrell     }
682e0c1b49fSNick Terrell     *nbSeqPtr = nbSeq;
683e0c1b49fSNick Terrell 
684e0c1b49fSNick Terrell     /* FSE table descriptors */
685e0c1b49fSNick Terrell     RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */
686e0c1b49fSNick Terrell     {   symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
687e0c1b49fSNick Terrell         symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
688e0c1b49fSNick Terrell         symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
689e0c1b49fSNick Terrell         ip++;
690e0c1b49fSNick Terrell 
691e0c1b49fSNick Terrell         /* Build DTables */
692e0c1b49fSNick Terrell         {   size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr,
693e0c1b49fSNick Terrell                                                       LLtype, MaxLL, LLFSELog,
694e0c1b49fSNick Terrell                                                       ip, iend-ip,
695e0c1b49fSNick Terrell                                                       LL_base, LL_bits,
696e0c1b49fSNick Terrell                                                       LL_defaultDTable, dctx->fseEntropy,
697e0c1b49fSNick Terrell                                                       dctx->ddictIsCold, nbSeq,
698e0c1b49fSNick Terrell                                                       dctx->workspace, sizeof(dctx->workspace),
699*2aa14b1aSNick Terrell                                                       ZSTD_DCtx_get_bmi2(dctx));
700e0c1b49fSNick Terrell             RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed");
701e0c1b49fSNick Terrell             ip += llhSize;
702e0c1b49fSNick Terrell         }
703e0c1b49fSNick Terrell 
704e0c1b49fSNick Terrell         {   size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr,
705e0c1b49fSNick Terrell                                                       OFtype, MaxOff, OffFSELog,
706e0c1b49fSNick Terrell                                                       ip, iend-ip,
707e0c1b49fSNick Terrell                                                       OF_base, OF_bits,
708e0c1b49fSNick Terrell                                                       OF_defaultDTable, dctx->fseEntropy,
709e0c1b49fSNick Terrell                                                       dctx->ddictIsCold, nbSeq,
710e0c1b49fSNick Terrell                                                       dctx->workspace, sizeof(dctx->workspace),
711*2aa14b1aSNick Terrell                                                       ZSTD_DCtx_get_bmi2(dctx));
712e0c1b49fSNick Terrell             RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed");
713e0c1b49fSNick Terrell             ip += ofhSize;
714e0c1b49fSNick Terrell         }
715e0c1b49fSNick Terrell 
716e0c1b49fSNick Terrell         {   size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr,
717e0c1b49fSNick Terrell                                                       MLtype, MaxML, MLFSELog,
718e0c1b49fSNick Terrell                                                       ip, iend-ip,
719e0c1b49fSNick Terrell                                                       ML_base, ML_bits,
720e0c1b49fSNick Terrell                                                       ML_defaultDTable, dctx->fseEntropy,
721e0c1b49fSNick Terrell                                                       dctx->ddictIsCold, nbSeq,
722e0c1b49fSNick Terrell                                                       dctx->workspace, sizeof(dctx->workspace),
723*2aa14b1aSNick Terrell                                                       ZSTD_DCtx_get_bmi2(dctx));
724e0c1b49fSNick Terrell             RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed");
725e0c1b49fSNick Terrell             ip += mlhSize;
726e0c1b49fSNick Terrell         }
727e0c1b49fSNick Terrell     }
728e0c1b49fSNick Terrell 
729e0c1b49fSNick Terrell     return ip-istart;
730e0c1b49fSNick Terrell }
731e0c1b49fSNick Terrell 
732e0c1b49fSNick Terrell 
733e0c1b49fSNick Terrell typedef struct {
734e0c1b49fSNick Terrell     size_t litLength;
735e0c1b49fSNick Terrell     size_t matchLength;
736e0c1b49fSNick Terrell     size_t offset;
737e0c1b49fSNick Terrell } seq_t;
738e0c1b49fSNick Terrell 
739e0c1b49fSNick Terrell typedef struct {
740e0c1b49fSNick Terrell     size_t state;
741e0c1b49fSNick Terrell     const ZSTD_seqSymbol* table;
742e0c1b49fSNick Terrell } ZSTD_fseState;
743e0c1b49fSNick Terrell 
744e0c1b49fSNick Terrell typedef struct {
745e0c1b49fSNick Terrell     BIT_DStream_t DStream;
746e0c1b49fSNick Terrell     ZSTD_fseState stateLL;
747e0c1b49fSNick Terrell     ZSTD_fseState stateOffb;
748e0c1b49fSNick Terrell     ZSTD_fseState stateML;
749e0c1b49fSNick Terrell     size_t prevOffset[ZSTD_REP_NUM];
750e0c1b49fSNick Terrell } seqState_t;
751e0c1b49fSNick Terrell 
752e0c1b49fSNick Terrell /*! ZSTD_overlapCopy8() :
753e0c1b49fSNick Terrell  *  Copies 8 bytes from ip to op and updates op and ip where ip <= op.
754e0c1b49fSNick Terrell  *  If the offset is < 8 then the offset is spread to at least 8 bytes.
755e0c1b49fSNick Terrell  *
756e0c1b49fSNick Terrell  *  Precondition: *ip <= *op
757e0c1b49fSNick Terrell  *  Postcondition: *op - *op >= 8
758e0c1b49fSNick Terrell  */
ZSTD_overlapCopy8(BYTE ** op,BYTE const ** ip,size_t offset)759e0c1b49fSNick Terrell HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
760e0c1b49fSNick Terrell     assert(*ip <= *op);
761e0c1b49fSNick Terrell     if (offset < 8) {
762e0c1b49fSNick Terrell         /* close range match, overlap */
763e0c1b49fSNick Terrell         static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 };   /* added */
764e0c1b49fSNick Terrell         static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 };   /* subtracted */
765e0c1b49fSNick Terrell         int const sub2 = dec64table[offset];
766e0c1b49fSNick Terrell         (*op)[0] = (*ip)[0];
767e0c1b49fSNick Terrell         (*op)[1] = (*ip)[1];
768e0c1b49fSNick Terrell         (*op)[2] = (*ip)[2];
769e0c1b49fSNick Terrell         (*op)[3] = (*ip)[3];
770e0c1b49fSNick Terrell         *ip += dec32table[offset];
771e0c1b49fSNick Terrell         ZSTD_copy4(*op+4, *ip);
772e0c1b49fSNick Terrell         *ip -= sub2;
773e0c1b49fSNick Terrell     } else {
774e0c1b49fSNick Terrell         ZSTD_copy8(*op, *ip);
775e0c1b49fSNick Terrell     }
776e0c1b49fSNick Terrell     *ip += 8;
777e0c1b49fSNick Terrell     *op += 8;
778e0c1b49fSNick Terrell     assert(*op - *ip >= 8);
779e0c1b49fSNick Terrell }
780e0c1b49fSNick Terrell 
781e0c1b49fSNick Terrell /*! ZSTD_safecopy() :
782e0c1b49fSNick Terrell  *  Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer
783e0c1b49fSNick Terrell  *  and write up to 16 bytes past oend_w (op >= oend_w is allowed).
784e0c1b49fSNick Terrell  *  This function is only called in the uncommon case where the sequence is near the end of the block. It
785e0c1b49fSNick Terrell  *  should be fast for a single long sequence, but can be slow for several short sequences.
786e0c1b49fSNick Terrell  *
787e0c1b49fSNick Terrell  *  @param ovtype controls the overlap detection
788e0c1b49fSNick Terrell  *         - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
789e0c1b49fSNick Terrell  *         - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart.
790e0c1b49fSNick Terrell  *           The src buffer must be before the dst buffer.
791e0c1b49fSNick Terrell  */
ZSTD_safecopy(BYTE * op,const BYTE * const oend_w,BYTE const * ip,ptrdiff_t length,ZSTD_overlap_e ovtype)792*2aa14b1aSNick Terrell static void ZSTD_safecopy(BYTE* op, const BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
793e0c1b49fSNick Terrell     ptrdiff_t const diff = op - ip;
794e0c1b49fSNick Terrell     BYTE* const oend = op + length;
795e0c1b49fSNick Terrell 
796e0c1b49fSNick Terrell     assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8 || op >= oend_w)) ||
797e0c1b49fSNick Terrell            (ovtype == ZSTD_overlap_src_before_dst && diff >= 0));
798e0c1b49fSNick Terrell 
799e0c1b49fSNick Terrell     if (length < 8) {
800e0c1b49fSNick Terrell         /* Handle short lengths. */
801e0c1b49fSNick Terrell         while (op < oend) *op++ = *ip++;
802e0c1b49fSNick Terrell         return;
803e0c1b49fSNick Terrell     }
804e0c1b49fSNick Terrell     if (ovtype == ZSTD_overlap_src_before_dst) {
805e0c1b49fSNick Terrell         /* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */
806e0c1b49fSNick Terrell         assert(length >= 8);
807e0c1b49fSNick Terrell         ZSTD_overlapCopy8(&op, &ip, diff);
808*2aa14b1aSNick Terrell         length -= 8;
809e0c1b49fSNick Terrell         assert(op - ip >= 8);
810e0c1b49fSNick Terrell         assert(op <= oend);
811e0c1b49fSNick Terrell     }
812e0c1b49fSNick Terrell 
813e0c1b49fSNick Terrell     if (oend <= oend_w) {
814e0c1b49fSNick Terrell         /* No risk of overwrite. */
815e0c1b49fSNick Terrell         ZSTD_wildcopy(op, ip, length, ovtype);
816e0c1b49fSNick Terrell         return;
817e0c1b49fSNick Terrell     }
818e0c1b49fSNick Terrell     if (op <= oend_w) {
819e0c1b49fSNick Terrell         /* Wildcopy until we get close to the end. */
820e0c1b49fSNick Terrell         assert(oend > oend_w);
821e0c1b49fSNick Terrell         ZSTD_wildcopy(op, ip, oend_w - op, ovtype);
822e0c1b49fSNick Terrell         ip += oend_w - op;
823*2aa14b1aSNick Terrell         op += oend_w - op;
824e0c1b49fSNick Terrell     }
825e0c1b49fSNick Terrell     /* Handle the leftovers. */
826e0c1b49fSNick Terrell     while (op < oend) *op++ = *ip++;
827e0c1b49fSNick Terrell }
828e0c1b49fSNick Terrell 
829*2aa14b1aSNick Terrell /* ZSTD_safecopyDstBeforeSrc():
830*2aa14b1aSNick Terrell  * This version allows overlap with dst before src, or handles the non-overlap case with dst after src
831*2aa14b1aSNick Terrell  * Kept separate from more common ZSTD_safecopy case to avoid performance impact to the safecopy common case */
ZSTD_safecopyDstBeforeSrc(BYTE * op,BYTE const * ip,ptrdiff_t length)832*2aa14b1aSNick Terrell static void ZSTD_safecopyDstBeforeSrc(BYTE* op, BYTE const* ip, ptrdiff_t length) {
833*2aa14b1aSNick Terrell     ptrdiff_t const diff = op - ip;
834*2aa14b1aSNick Terrell     BYTE* const oend = op + length;
835*2aa14b1aSNick Terrell 
836*2aa14b1aSNick Terrell     if (length < 8 || diff > -8) {
837*2aa14b1aSNick Terrell         /* Handle short lengths, close overlaps, and dst not before src. */
838*2aa14b1aSNick Terrell         while (op < oend) *op++ = *ip++;
839*2aa14b1aSNick Terrell         return;
840*2aa14b1aSNick Terrell     }
841*2aa14b1aSNick Terrell 
842*2aa14b1aSNick Terrell     if (op <= oend - WILDCOPY_OVERLENGTH && diff < -WILDCOPY_VECLEN) {
843*2aa14b1aSNick Terrell         ZSTD_wildcopy(op, ip, oend - WILDCOPY_OVERLENGTH - op, ZSTD_no_overlap);
844*2aa14b1aSNick Terrell         ip += oend - WILDCOPY_OVERLENGTH - op;
845*2aa14b1aSNick Terrell         op += oend - WILDCOPY_OVERLENGTH - op;
846*2aa14b1aSNick Terrell     }
847*2aa14b1aSNick Terrell 
848*2aa14b1aSNick Terrell     /* Handle the leftovers. */
849*2aa14b1aSNick Terrell     while (op < oend) *op++ = *ip++;
850*2aa14b1aSNick Terrell }
851*2aa14b1aSNick Terrell 
852e0c1b49fSNick Terrell /* ZSTD_execSequenceEnd():
853e0c1b49fSNick Terrell  * This version handles cases that are near the end of the output buffer. It requires
854e0c1b49fSNick Terrell  * more careful checks to make sure there is no overflow. By separating out these hard
855e0c1b49fSNick Terrell  * and unlikely cases, we can speed up the common cases.
856e0c1b49fSNick Terrell  *
857e0c1b49fSNick Terrell  * NOTE: This function needs to be fast for a single long sequence, but doesn't need
858e0c1b49fSNick Terrell  * to be optimized for many small sequences, since those fall into ZSTD_execSequence().
859e0c1b49fSNick Terrell  */
860e0c1b49fSNick Terrell FORCE_NOINLINE
ZSTD_execSequenceEnd(BYTE * op,BYTE * const oend,seq_t sequence,const BYTE ** litPtr,const BYTE * const litLimit,const BYTE * const prefixStart,const BYTE * const virtualStart,const BYTE * const dictEnd)861e0c1b49fSNick Terrell size_t ZSTD_execSequenceEnd(BYTE* op,
862e0c1b49fSNick Terrell     BYTE* const oend, seq_t sequence,
863e0c1b49fSNick Terrell     const BYTE** litPtr, const BYTE* const litLimit,
864e0c1b49fSNick Terrell     const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
865e0c1b49fSNick Terrell {
866e0c1b49fSNick Terrell     BYTE* const oLitEnd = op + sequence.litLength;
867e0c1b49fSNick Terrell     size_t const sequenceLength = sequence.litLength + sequence.matchLength;
868e0c1b49fSNick Terrell     const BYTE* const iLitEnd = *litPtr + sequence.litLength;
869e0c1b49fSNick Terrell     const BYTE* match = oLitEnd - sequence.offset;
870e0c1b49fSNick Terrell     BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
871e0c1b49fSNick Terrell 
872e0c1b49fSNick Terrell     /* bounds checks : careful of address space overflow in 32-bit mode */
873e0c1b49fSNick Terrell     RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
874e0c1b49fSNick Terrell     RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
875e0c1b49fSNick Terrell     assert(op < op + sequenceLength);
876e0c1b49fSNick Terrell     assert(oLitEnd < op + sequenceLength);
877e0c1b49fSNick Terrell 
878e0c1b49fSNick Terrell     /* copy literals */
879e0c1b49fSNick Terrell     ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap);
880e0c1b49fSNick Terrell     op = oLitEnd;
881e0c1b49fSNick Terrell     *litPtr = iLitEnd;
882e0c1b49fSNick Terrell 
883e0c1b49fSNick Terrell     /* copy Match */
884e0c1b49fSNick Terrell     if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
885e0c1b49fSNick Terrell         /* offset beyond prefix */
886e0c1b49fSNick Terrell         RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
887e0c1b49fSNick Terrell         match = dictEnd - (prefixStart - match);
888e0c1b49fSNick Terrell         if (match + sequence.matchLength <= dictEnd) {
889e0c1b49fSNick Terrell             ZSTD_memmove(oLitEnd, match, sequence.matchLength);
890e0c1b49fSNick Terrell             return sequenceLength;
891e0c1b49fSNick Terrell         }
892e0c1b49fSNick Terrell         /* span extDict & currentPrefixSegment */
893e0c1b49fSNick Terrell         {   size_t const length1 = dictEnd - match;
894e0c1b49fSNick Terrell         ZSTD_memmove(oLitEnd, match, length1);
895e0c1b49fSNick Terrell         op = oLitEnd + length1;
896e0c1b49fSNick Terrell         sequence.matchLength -= length1;
897e0c1b49fSNick Terrell         match = prefixStart;
898*2aa14b1aSNick Terrell         }
899*2aa14b1aSNick Terrell     }
900*2aa14b1aSNick Terrell     ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
901*2aa14b1aSNick Terrell     return sequenceLength;
902*2aa14b1aSNick Terrell }
903*2aa14b1aSNick Terrell 
904*2aa14b1aSNick Terrell /* ZSTD_execSequenceEndSplitLitBuffer():
905*2aa14b1aSNick Terrell  * This version is intended to be used during instances where the litBuffer is still split.  It is kept separate to avoid performance impact for the good case.
906*2aa14b1aSNick Terrell  */
907*2aa14b1aSNick Terrell FORCE_NOINLINE
ZSTD_execSequenceEndSplitLitBuffer(BYTE * op,BYTE * const oend,const BYTE * const oend_w,seq_t sequence,const BYTE ** litPtr,const BYTE * const litLimit,const BYTE * const prefixStart,const BYTE * const virtualStart,const BYTE * const dictEnd)908*2aa14b1aSNick Terrell size_t ZSTD_execSequenceEndSplitLitBuffer(BYTE* op,
909*2aa14b1aSNick Terrell     BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
910*2aa14b1aSNick Terrell     const BYTE** litPtr, const BYTE* const litLimit,
911*2aa14b1aSNick Terrell     const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
912*2aa14b1aSNick Terrell {
913*2aa14b1aSNick Terrell     BYTE* const oLitEnd = op + sequence.litLength;
914*2aa14b1aSNick Terrell     size_t const sequenceLength = sequence.litLength + sequence.matchLength;
915*2aa14b1aSNick Terrell     const BYTE* const iLitEnd = *litPtr + sequence.litLength;
916*2aa14b1aSNick Terrell     const BYTE* match = oLitEnd - sequence.offset;
917*2aa14b1aSNick Terrell 
918*2aa14b1aSNick Terrell 
919*2aa14b1aSNick Terrell     /* bounds checks : careful of address space overflow in 32-bit mode */
920*2aa14b1aSNick Terrell     RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
921*2aa14b1aSNick Terrell     RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
922*2aa14b1aSNick Terrell     assert(op < op + sequenceLength);
923*2aa14b1aSNick Terrell     assert(oLitEnd < op + sequenceLength);
924*2aa14b1aSNick Terrell 
925*2aa14b1aSNick Terrell     /* copy literals */
926*2aa14b1aSNick Terrell     RETURN_ERROR_IF(op > *litPtr && op < *litPtr + sequence.litLength, dstSize_tooSmall, "output should not catch up to and overwrite literal buffer");
927*2aa14b1aSNick Terrell     ZSTD_safecopyDstBeforeSrc(op, *litPtr, sequence.litLength);
928*2aa14b1aSNick Terrell     op = oLitEnd;
929*2aa14b1aSNick Terrell     *litPtr = iLitEnd;
930*2aa14b1aSNick Terrell 
931*2aa14b1aSNick Terrell     /* copy Match */
932*2aa14b1aSNick Terrell     if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
933*2aa14b1aSNick Terrell         /* offset beyond prefix */
934*2aa14b1aSNick Terrell         RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
935*2aa14b1aSNick Terrell         match = dictEnd - (prefixStart - match);
936*2aa14b1aSNick Terrell         if (match + sequence.matchLength <= dictEnd) {
937*2aa14b1aSNick Terrell             ZSTD_memmove(oLitEnd, match, sequence.matchLength);
938*2aa14b1aSNick Terrell             return sequenceLength;
939*2aa14b1aSNick Terrell         }
940*2aa14b1aSNick Terrell         /* span extDict & currentPrefixSegment */
941*2aa14b1aSNick Terrell         {   size_t const length1 = dictEnd - match;
942*2aa14b1aSNick Terrell         ZSTD_memmove(oLitEnd, match, length1);
943*2aa14b1aSNick Terrell         op = oLitEnd + length1;
944*2aa14b1aSNick Terrell         sequence.matchLength -= length1;
945*2aa14b1aSNick Terrell         match = prefixStart;
946*2aa14b1aSNick Terrell         }
947*2aa14b1aSNick Terrell     }
948e0c1b49fSNick Terrell     ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
949e0c1b49fSNick Terrell     return sequenceLength;
950e0c1b49fSNick Terrell }
951e0c1b49fSNick Terrell 
952e0c1b49fSNick Terrell HINT_INLINE
ZSTD_execSequence(BYTE * op,BYTE * const oend,seq_t sequence,const BYTE ** litPtr,const BYTE * const litLimit,const BYTE * const prefixStart,const BYTE * const virtualStart,const BYTE * const dictEnd)953e0c1b49fSNick Terrell size_t ZSTD_execSequence(BYTE* op,
954e0c1b49fSNick Terrell     BYTE* const oend, seq_t sequence,
955e0c1b49fSNick Terrell     const BYTE** litPtr, const BYTE* const litLimit,
956e0c1b49fSNick Terrell     const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
957e0c1b49fSNick Terrell {
958e0c1b49fSNick Terrell     BYTE* const oLitEnd = op + sequence.litLength;
959e0c1b49fSNick Terrell     size_t const sequenceLength = sequence.litLength + sequence.matchLength;
960e0c1b49fSNick Terrell     BYTE* const oMatchEnd = op + sequenceLength;   /* risk : address space overflow (32-bits) */
961e0c1b49fSNick Terrell     BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;   /* risk : address space underflow on oend=NULL */
962e0c1b49fSNick Terrell     const BYTE* const iLitEnd = *litPtr + sequence.litLength;
963e0c1b49fSNick Terrell     const BYTE* match = oLitEnd - sequence.offset;
964e0c1b49fSNick Terrell 
965e0c1b49fSNick Terrell     assert(op != NULL /* Precondition */);
966e0c1b49fSNick Terrell     assert(oend_w < oend /* No underflow */);
967e0c1b49fSNick Terrell     /* Handle edge cases in a slow path:
968e0c1b49fSNick Terrell      *   - Read beyond end of literals
969e0c1b49fSNick Terrell      *   - Match end is within WILDCOPY_OVERLIMIT of oend
970e0c1b49fSNick Terrell      *   - 32-bit mode and the match length overflows
971e0c1b49fSNick Terrell      */
972e0c1b49fSNick Terrell     if (UNLIKELY(
973e0c1b49fSNick Terrell         iLitEnd > litLimit ||
974e0c1b49fSNick Terrell         oMatchEnd > oend_w ||
975e0c1b49fSNick Terrell         (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
976e0c1b49fSNick Terrell         return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
977e0c1b49fSNick Terrell 
978e0c1b49fSNick Terrell     /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
979e0c1b49fSNick Terrell     assert(op <= oLitEnd /* No overflow */);
980e0c1b49fSNick Terrell     assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
981e0c1b49fSNick Terrell     assert(oMatchEnd <= oend /* No underflow */);
982e0c1b49fSNick Terrell     assert(iLitEnd <= litLimit /* Literal length is in bounds */);
983e0c1b49fSNick Terrell     assert(oLitEnd <= oend_w /* Can wildcopy literals */);
984e0c1b49fSNick Terrell     assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
985e0c1b49fSNick Terrell 
986e0c1b49fSNick Terrell     /* Copy Literals:
987e0c1b49fSNick Terrell      * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
988e0c1b49fSNick Terrell      * We likely don't need the full 32-byte wildcopy.
989e0c1b49fSNick Terrell      */
990e0c1b49fSNick Terrell     assert(WILDCOPY_OVERLENGTH >= 16);
991e0c1b49fSNick Terrell     ZSTD_copy16(op, (*litPtr));
992e0c1b49fSNick Terrell     if (UNLIKELY(sequence.litLength > 16)) {
993e0c1b49fSNick Terrell         ZSTD_wildcopy(op + 16, (*litPtr) + 16, sequence.litLength - 16, ZSTD_no_overlap);
994e0c1b49fSNick Terrell     }
995e0c1b49fSNick Terrell     op = oLitEnd;
996e0c1b49fSNick Terrell     *litPtr = iLitEnd;   /* update for next sequence */
997e0c1b49fSNick Terrell 
998e0c1b49fSNick Terrell     /* Copy Match */
999e0c1b49fSNick Terrell     if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
1000e0c1b49fSNick Terrell         /* offset beyond prefix -> go into extDict */
1001e0c1b49fSNick Terrell         RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
1002e0c1b49fSNick Terrell         match = dictEnd + (match - prefixStart);
1003e0c1b49fSNick Terrell         if (match + sequence.matchLength <= dictEnd) {
1004e0c1b49fSNick Terrell             ZSTD_memmove(oLitEnd, match, sequence.matchLength);
1005e0c1b49fSNick Terrell             return sequenceLength;
1006e0c1b49fSNick Terrell         }
1007e0c1b49fSNick Terrell         /* span extDict & currentPrefixSegment */
1008e0c1b49fSNick Terrell         {   size_t const length1 = dictEnd - match;
1009e0c1b49fSNick Terrell         ZSTD_memmove(oLitEnd, match, length1);
1010e0c1b49fSNick Terrell         op = oLitEnd + length1;
1011e0c1b49fSNick Terrell         sequence.matchLength -= length1;
1012e0c1b49fSNick Terrell         match = prefixStart;
1013*2aa14b1aSNick Terrell         }
1014*2aa14b1aSNick Terrell     }
1015*2aa14b1aSNick Terrell     /* Match within prefix of 1 or more bytes */
1016*2aa14b1aSNick Terrell     assert(op <= oMatchEnd);
1017*2aa14b1aSNick Terrell     assert(oMatchEnd <= oend_w);
1018*2aa14b1aSNick Terrell     assert(match >= prefixStart);
1019*2aa14b1aSNick Terrell     assert(sequence.matchLength >= 1);
1020*2aa14b1aSNick Terrell 
1021*2aa14b1aSNick Terrell     /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
1022*2aa14b1aSNick Terrell      * without overlap checking.
1023*2aa14b1aSNick Terrell      */
1024*2aa14b1aSNick Terrell     if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
1025*2aa14b1aSNick Terrell         /* We bet on a full wildcopy for matches, since we expect matches to be
1026*2aa14b1aSNick Terrell          * longer than literals (in general). In silesia, ~10% of matches are longer
1027*2aa14b1aSNick Terrell          * than 16 bytes.
1028*2aa14b1aSNick Terrell          */
1029*2aa14b1aSNick Terrell         ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
1030*2aa14b1aSNick Terrell         return sequenceLength;
1031*2aa14b1aSNick Terrell     }
1032*2aa14b1aSNick Terrell     assert(sequence.offset < WILDCOPY_VECLEN);
1033*2aa14b1aSNick Terrell 
1034*2aa14b1aSNick Terrell     /* Copy 8 bytes and spread the offset to be >= 8. */
1035*2aa14b1aSNick Terrell     ZSTD_overlapCopy8(&op, &match, sequence.offset);
1036*2aa14b1aSNick Terrell 
1037*2aa14b1aSNick Terrell     /* If the match length is > 8 bytes, then continue with the wildcopy. */
1038*2aa14b1aSNick Terrell     if (sequence.matchLength > 8) {
1039*2aa14b1aSNick Terrell         assert(op < oMatchEnd);
1040*2aa14b1aSNick Terrell         ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength - 8, ZSTD_overlap_src_before_dst);
1041*2aa14b1aSNick Terrell     }
1042*2aa14b1aSNick Terrell     return sequenceLength;
1043*2aa14b1aSNick Terrell }
1044*2aa14b1aSNick Terrell 
1045*2aa14b1aSNick Terrell HINT_INLINE
ZSTD_execSequenceSplitLitBuffer(BYTE * op,BYTE * const oend,const BYTE * const oend_w,seq_t sequence,const BYTE ** litPtr,const BYTE * const litLimit,const BYTE * const prefixStart,const BYTE * const virtualStart,const BYTE * const dictEnd)1046*2aa14b1aSNick Terrell size_t ZSTD_execSequenceSplitLitBuffer(BYTE* op,
1047*2aa14b1aSNick Terrell     BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
1048*2aa14b1aSNick Terrell     const BYTE** litPtr, const BYTE* const litLimit,
1049*2aa14b1aSNick Terrell     const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
1050*2aa14b1aSNick Terrell {
1051*2aa14b1aSNick Terrell     BYTE* const oLitEnd = op + sequence.litLength;
1052*2aa14b1aSNick Terrell     size_t const sequenceLength = sequence.litLength + sequence.matchLength;
1053*2aa14b1aSNick Terrell     BYTE* const oMatchEnd = op + sequenceLength;   /* risk : address space overflow (32-bits) */
1054*2aa14b1aSNick Terrell     const BYTE* const iLitEnd = *litPtr + sequence.litLength;
1055*2aa14b1aSNick Terrell     const BYTE* match = oLitEnd - sequence.offset;
1056*2aa14b1aSNick Terrell 
1057*2aa14b1aSNick Terrell     assert(op != NULL /* Precondition */);
1058*2aa14b1aSNick Terrell     assert(oend_w < oend /* No underflow */);
1059*2aa14b1aSNick Terrell     /* Handle edge cases in a slow path:
1060*2aa14b1aSNick Terrell      *   - Read beyond end of literals
1061*2aa14b1aSNick Terrell      *   - Match end is within WILDCOPY_OVERLIMIT of oend
1062*2aa14b1aSNick Terrell      *   - 32-bit mode and the match length overflows
1063*2aa14b1aSNick Terrell      */
1064*2aa14b1aSNick Terrell     if (UNLIKELY(
1065*2aa14b1aSNick Terrell             iLitEnd > litLimit ||
1066*2aa14b1aSNick Terrell             oMatchEnd > oend_w ||
1067*2aa14b1aSNick Terrell             (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
1068*2aa14b1aSNick Terrell         return ZSTD_execSequenceEndSplitLitBuffer(op, oend, oend_w, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
1069*2aa14b1aSNick Terrell 
1070*2aa14b1aSNick Terrell     /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
1071*2aa14b1aSNick Terrell     assert(op <= oLitEnd /* No overflow */);
1072*2aa14b1aSNick Terrell     assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
1073*2aa14b1aSNick Terrell     assert(oMatchEnd <= oend /* No underflow */);
1074*2aa14b1aSNick Terrell     assert(iLitEnd <= litLimit /* Literal length is in bounds */);
1075*2aa14b1aSNick Terrell     assert(oLitEnd <= oend_w /* Can wildcopy literals */);
1076*2aa14b1aSNick Terrell     assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
1077*2aa14b1aSNick Terrell 
1078*2aa14b1aSNick Terrell     /* Copy Literals:
1079*2aa14b1aSNick Terrell      * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
1080*2aa14b1aSNick Terrell      * We likely don't need the full 32-byte wildcopy.
1081*2aa14b1aSNick Terrell      */
1082*2aa14b1aSNick Terrell     assert(WILDCOPY_OVERLENGTH >= 16);
1083*2aa14b1aSNick Terrell     ZSTD_copy16(op, (*litPtr));
1084*2aa14b1aSNick Terrell     if (UNLIKELY(sequence.litLength > 16)) {
1085*2aa14b1aSNick Terrell         ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap);
1086*2aa14b1aSNick Terrell     }
1087*2aa14b1aSNick Terrell     op = oLitEnd;
1088*2aa14b1aSNick Terrell     *litPtr = iLitEnd;   /* update for next sequence */
1089*2aa14b1aSNick Terrell 
1090*2aa14b1aSNick Terrell     /* Copy Match */
1091*2aa14b1aSNick Terrell     if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
1092*2aa14b1aSNick Terrell         /* offset beyond prefix -> go into extDict */
1093*2aa14b1aSNick Terrell         RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
1094*2aa14b1aSNick Terrell         match = dictEnd + (match - prefixStart);
1095*2aa14b1aSNick Terrell         if (match + sequence.matchLength <= dictEnd) {
1096*2aa14b1aSNick Terrell             ZSTD_memmove(oLitEnd, match, sequence.matchLength);
1097*2aa14b1aSNick Terrell             return sequenceLength;
1098*2aa14b1aSNick Terrell         }
1099*2aa14b1aSNick Terrell         /* span extDict & currentPrefixSegment */
1100*2aa14b1aSNick Terrell         {   size_t const length1 = dictEnd - match;
1101*2aa14b1aSNick Terrell             ZSTD_memmove(oLitEnd, match, length1);
1102*2aa14b1aSNick Terrell             op = oLitEnd + length1;
1103*2aa14b1aSNick Terrell             sequence.matchLength -= length1;
1104*2aa14b1aSNick Terrell             match = prefixStart;
1105e0c1b49fSNick Terrell     }   }
1106e0c1b49fSNick Terrell     /* Match within prefix of 1 or more bytes */
1107e0c1b49fSNick Terrell     assert(op <= oMatchEnd);
1108e0c1b49fSNick Terrell     assert(oMatchEnd <= oend_w);
1109e0c1b49fSNick Terrell     assert(match >= prefixStart);
1110e0c1b49fSNick Terrell     assert(sequence.matchLength >= 1);
1111e0c1b49fSNick Terrell 
1112e0c1b49fSNick Terrell     /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
1113e0c1b49fSNick Terrell      * without overlap checking.
1114e0c1b49fSNick Terrell      */
1115e0c1b49fSNick Terrell     if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
1116e0c1b49fSNick Terrell         /* We bet on a full wildcopy for matches, since we expect matches to be
1117e0c1b49fSNick Terrell          * longer than literals (in general). In silesia, ~10% of matches are longer
1118e0c1b49fSNick Terrell          * than 16 bytes.
1119e0c1b49fSNick Terrell          */
1120e0c1b49fSNick Terrell         ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
1121e0c1b49fSNick Terrell         return sequenceLength;
1122e0c1b49fSNick Terrell     }
1123e0c1b49fSNick Terrell     assert(sequence.offset < WILDCOPY_VECLEN);
1124e0c1b49fSNick Terrell 
1125e0c1b49fSNick Terrell     /* Copy 8 bytes and spread the offset to be >= 8. */
1126e0c1b49fSNick Terrell     ZSTD_overlapCopy8(&op, &match, sequence.offset);
1127e0c1b49fSNick Terrell 
1128e0c1b49fSNick Terrell     /* If the match length is > 8 bytes, then continue with the wildcopy. */
1129e0c1b49fSNick Terrell     if (sequence.matchLength > 8) {
1130e0c1b49fSNick Terrell         assert(op < oMatchEnd);
1131e0c1b49fSNick Terrell         ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst);
1132e0c1b49fSNick Terrell     }
1133e0c1b49fSNick Terrell     return sequenceLength;
1134e0c1b49fSNick Terrell }
1135e0c1b49fSNick Terrell 
1136*2aa14b1aSNick Terrell 
1137e0c1b49fSNick Terrell static void
ZSTD_initFseState(ZSTD_fseState * DStatePtr,BIT_DStream_t * bitD,const ZSTD_seqSymbol * dt)1138e0c1b49fSNick Terrell ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt)
1139e0c1b49fSNick Terrell {
1140e0c1b49fSNick Terrell     const void* ptr = dt;
1141e0c1b49fSNick Terrell     const ZSTD_seqSymbol_header* const DTableH = (const ZSTD_seqSymbol_header*)ptr;
1142e0c1b49fSNick Terrell     DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
1143e0c1b49fSNick Terrell     DEBUGLOG(6, "ZSTD_initFseState : val=%u using %u bits",
1144e0c1b49fSNick Terrell                 (U32)DStatePtr->state, DTableH->tableLog);
1145e0c1b49fSNick Terrell     BIT_reloadDStream(bitD);
1146e0c1b49fSNick Terrell     DStatePtr->table = dt + 1;
1147e0c1b49fSNick Terrell }
1148e0c1b49fSNick Terrell 
1149e0c1b49fSNick Terrell FORCE_INLINE_TEMPLATE void
ZSTD_updateFseStateWithDInfo(ZSTD_fseState * DStatePtr,BIT_DStream_t * bitD,U16 nextState,U32 nbBits)1150*2aa14b1aSNick Terrell ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, U16 nextState, U32 nbBits)
1151e0c1b49fSNick Terrell {
1152e0c1b49fSNick Terrell     size_t const lowBits = BIT_readBits(bitD, nbBits);
1153*2aa14b1aSNick Terrell     DStatePtr->state = nextState + lowBits;
1154e0c1b49fSNick Terrell }
1155e0c1b49fSNick Terrell 
1156e0c1b49fSNick Terrell /* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
1157e0c1b49fSNick Terrell  * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
1158e0c1b49fSNick Terrell  * bits before reloading. This value is the maximum number of bytes we read
1159e0c1b49fSNick Terrell  * after reloading when we are decoding long offsets.
1160e0c1b49fSNick Terrell  */
1161e0c1b49fSNick Terrell #define LONG_OFFSETS_MAX_EXTRA_BITS_32                       \
1162e0c1b49fSNick Terrell     (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32       \
1163e0c1b49fSNick Terrell         ? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32  \
1164e0c1b49fSNick Terrell         : 0)
1165e0c1b49fSNick Terrell 
1166e0c1b49fSNick Terrell typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
1167e0c1b49fSNick Terrell 
1168e0c1b49fSNick Terrell FORCE_INLINE_TEMPLATE seq_t
ZSTD_decodeSequence(seqState_t * seqState,const ZSTD_longOffset_e longOffsets)1169*2aa14b1aSNick Terrell ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
1170e0c1b49fSNick Terrell {
1171e0c1b49fSNick Terrell     seq_t seq;
1172*2aa14b1aSNick Terrell     const ZSTD_seqSymbol* const llDInfo = seqState->stateLL.table + seqState->stateLL.state;
1173*2aa14b1aSNick Terrell     const ZSTD_seqSymbol* const mlDInfo = seqState->stateML.table + seqState->stateML.state;
1174*2aa14b1aSNick Terrell     const ZSTD_seqSymbol* const ofDInfo = seqState->stateOffb.table + seqState->stateOffb.state;
1175*2aa14b1aSNick Terrell     seq.matchLength = mlDInfo->baseValue;
1176*2aa14b1aSNick Terrell     seq.litLength = llDInfo->baseValue;
1177*2aa14b1aSNick Terrell     {   U32 const ofBase = ofDInfo->baseValue;
1178*2aa14b1aSNick Terrell         BYTE const llBits = llDInfo->nbAdditionalBits;
1179*2aa14b1aSNick Terrell         BYTE const mlBits = mlDInfo->nbAdditionalBits;
1180*2aa14b1aSNick Terrell         BYTE const ofBits = ofDInfo->nbAdditionalBits;
1181e0c1b49fSNick Terrell         BYTE const totalBits = llBits+mlBits+ofBits;
1182e0c1b49fSNick Terrell 
1183*2aa14b1aSNick Terrell         U16 const llNext = llDInfo->nextState;
1184*2aa14b1aSNick Terrell         U16 const mlNext = mlDInfo->nextState;
1185*2aa14b1aSNick Terrell         U16 const ofNext = ofDInfo->nextState;
1186*2aa14b1aSNick Terrell         U32 const llnbBits = llDInfo->nbBits;
1187*2aa14b1aSNick Terrell         U32 const mlnbBits = mlDInfo->nbBits;
1188*2aa14b1aSNick Terrell         U32 const ofnbBits = ofDInfo->nbBits;
1189*2aa14b1aSNick Terrell         /*
1190*2aa14b1aSNick Terrell          * As gcc has better branch and block analyzers, sometimes it is only
1191*2aa14b1aSNick Terrell          * valuable to mark likelyness for clang, it gives around 3-4% of
1192*2aa14b1aSNick Terrell          * performance.
1193*2aa14b1aSNick Terrell          */
1194*2aa14b1aSNick Terrell 
1195e0c1b49fSNick Terrell         /* sequence */
1196e0c1b49fSNick Terrell         {   size_t offset;
1197*2aa14b1aSNick Terrell     #if defined(__clang__)
1198*2aa14b1aSNick Terrell             if (LIKELY(ofBits > 1)) {
1199*2aa14b1aSNick Terrell     #else
1200e0c1b49fSNick Terrell             if (ofBits > 1) {
1201*2aa14b1aSNick Terrell     #endif
1202e0c1b49fSNick Terrell                 ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
1203e0c1b49fSNick Terrell                 ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
1204e0c1b49fSNick Terrell                 assert(ofBits <= MaxOff);
1205e0c1b49fSNick Terrell                 if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
1206e0c1b49fSNick Terrell                     U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed);
1207e0c1b49fSNick Terrell                     offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
1208e0c1b49fSNick Terrell                     BIT_reloadDStream(&seqState->DStream);
1209e0c1b49fSNick Terrell                     if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
1210e0c1b49fSNick Terrell                     assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32);   /* to avoid another reload */
1211e0c1b49fSNick Terrell                 } else {
1212e0c1b49fSNick Terrell                     offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/);   /* <=  (ZSTD_WINDOWLOG_MAX-1) bits */
1213e0c1b49fSNick Terrell                     if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
1214e0c1b49fSNick Terrell                 }
1215e0c1b49fSNick Terrell                 seqState->prevOffset[2] = seqState->prevOffset[1];
1216e0c1b49fSNick Terrell                 seqState->prevOffset[1] = seqState->prevOffset[0];
1217e0c1b49fSNick Terrell                 seqState->prevOffset[0] = offset;
1218e0c1b49fSNick Terrell             } else {
1219*2aa14b1aSNick Terrell                 U32 const ll0 = (llDInfo->baseValue == 0);
1220e0c1b49fSNick Terrell                 if (LIKELY((ofBits == 0))) {
1221*2aa14b1aSNick Terrell                     offset = seqState->prevOffset[ll0];
1222*2aa14b1aSNick Terrell                     seqState->prevOffset[1] = seqState->prevOffset[!ll0];
1223e0c1b49fSNick Terrell                     seqState->prevOffset[0] = offset;
1224e0c1b49fSNick Terrell                 } else {
1225e0c1b49fSNick Terrell                     offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1);
1226e0c1b49fSNick Terrell                     {   size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
1227e0c1b49fSNick Terrell                         temp += !temp;   /* 0 is not valid; input is corrupted; force offset to 1 */
1228e0c1b49fSNick Terrell                         if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
1229e0c1b49fSNick Terrell                         seqState->prevOffset[1] = seqState->prevOffset[0];
1230e0c1b49fSNick Terrell                         seqState->prevOffset[0] = offset = temp;
1231e0c1b49fSNick Terrell             }   }   }
1232e0c1b49fSNick Terrell             seq.offset = offset;
1233e0c1b49fSNick Terrell         }
1234e0c1b49fSNick Terrell 
1235*2aa14b1aSNick Terrell     #if defined(__clang__)
1236*2aa14b1aSNick Terrell         if (UNLIKELY(mlBits > 0))
1237*2aa14b1aSNick Terrell     #else
1238e0c1b49fSNick Terrell         if (mlBits > 0)
1239*2aa14b1aSNick Terrell     #endif
1240e0c1b49fSNick Terrell             seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/);
1241e0c1b49fSNick Terrell 
1242e0c1b49fSNick Terrell         if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
1243e0c1b49fSNick Terrell             BIT_reloadDStream(&seqState->DStream);
1244e0c1b49fSNick Terrell         if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
1245e0c1b49fSNick Terrell             BIT_reloadDStream(&seqState->DStream);
1246e0c1b49fSNick Terrell         /* Ensure there are enough bits to read the rest of data in 64-bit mode. */
1247e0c1b49fSNick Terrell         ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
1248e0c1b49fSNick Terrell 
1249*2aa14b1aSNick Terrell     #if defined(__clang__)
1250*2aa14b1aSNick Terrell         if (UNLIKELY(llBits > 0))
1251*2aa14b1aSNick Terrell     #else
1252e0c1b49fSNick Terrell         if (llBits > 0)
1253*2aa14b1aSNick Terrell     #endif
1254e0c1b49fSNick Terrell             seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/);
1255e0c1b49fSNick Terrell 
1256e0c1b49fSNick Terrell         if (MEM_32bits())
1257e0c1b49fSNick Terrell             BIT_reloadDStream(&seqState->DStream);
1258e0c1b49fSNick Terrell 
1259e0c1b49fSNick Terrell         DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
1260e0c1b49fSNick Terrell                     (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
1261e0c1b49fSNick Terrell 
1262*2aa14b1aSNick Terrell         ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llNext, llnbBits);    /* <=  9 bits */
1263*2aa14b1aSNick Terrell         ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlNext, mlnbBits);    /* <=  9 bits */
1264e0c1b49fSNick Terrell         if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);    /* <= 18 bits */
1265*2aa14b1aSNick Terrell         ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofNext, ofnbBits);  /* <=  8 bits */
1266e0c1b49fSNick Terrell     }
1267e0c1b49fSNick Terrell 
1268e0c1b49fSNick Terrell     return seq;
1269e0c1b49fSNick Terrell }
1270e0c1b49fSNick Terrell 
1271e0c1b49fSNick Terrell #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
1272e0c1b49fSNick Terrell MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)
1273e0c1b49fSNick Terrell {
1274e0c1b49fSNick Terrell     size_t const windowSize = dctx->fParams.windowSize;
1275e0c1b49fSNick Terrell     /* No dictionary used. */
1276e0c1b49fSNick Terrell     if (dctx->dictContentEndForFuzzing == NULL) return 0;
1277e0c1b49fSNick Terrell     /* Dictionary is our prefix. */
1278e0c1b49fSNick Terrell     if (prefixStart == dctx->dictContentBeginForFuzzing) return 1;
1279e0c1b49fSNick Terrell     /* Dictionary is not our ext-dict. */
1280e0c1b49fSNick Terrell     if (dctx->dictEnd != dctx->dictContentEndForFuzzing) return 0;
1281e0c1b49fSNick Terrell     /* Dictionary is not within our window size. */
1282e0c1b49fSNick Terrell     if ((size_t)(oLitEnd - prefixStart) >= windowSize) return 0;
1283e0c1b49fSNick Terrell     /* Dictionary is active. */
1284e0c1b49fSNick Terrell     return 1;
1285e0c1b49fSNick Terrell }
1286e0c1b49fSNick Terrell 
1287e0c1b49fSNick Terrell MEM_STATIC void ZSTD_assertValidSequence(
1288e0c1b49fSNick Terrell         ZSTD_DCtx const* dctx,
1289e0c1b49fSNick Terrell         BYTE const* op, BYTE const* oend,
1290e0c1b49fSNick Terrell         seq_t const seq,
1291e0c1b49fSNick Terrell         BYTE const* prefixStart, BYTE const* virtualStart)
1292e0c1b49fSNick Terrell {
1293e0c1b49fSNick Terrell #if DEBUGLEVEL >= 1
1294e0c1b49fSNick Terrell     size_t const windowSize = dctx->fParams.windowSize;
1295e0c1b49fSNick Terrell     size_t const sequenceSize = seq.litLength + seq.matchLength;
1296e0c1b49fSNick Terrell     BYTE const* const oLitEnd = op + seq.litLength;
1297e0c1b49fSNick Terrell     DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u",
1298e0c1b49fSNick Terrell             (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
1299e0c1b49fSNick Terrell     assert(op <= oend);
1300e0c1b49fSNick Terrell     assert((size_t)(oend - op) >= sequenceSize);
1301e0c1b49fSNick Terrell     assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX);
1302e0c1b49fSNick Terrell     if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) {
1303e0c1b49fSNick Terrell         size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing);
1304e0c1b49fSNick Terrell         /* Offset must be within the dictionary. */
1305e0c1b49fSNick Terrell         assert(seq.offset <= (size_t)(oLitEnd - virtualStart));
1306e0c1b49fSNick Terrell         assert(seq.offset <= windowSize + dictSize);
1307e0c1b49fSNick Terrell     } else {
1308e0c1b49fSNick Terrell         /* Offset must be within our window. */
1309e0c1b49fSNick Terrell         assert(seq.offset <= windowSize);
1310e0c1b49fSNick Terrell     }
1311e0c1b49fSNick Terrell #else
1312e0c1b49fSNick Terrell     (void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart;
1313e0c1b49fSNick Terrell #endif
1314e0c1b49fSNick Terrell }
1315e0c1b49fSNick Terrell #endif
1316e0c1b49fSNick Terrell 
1317e0c1b49fSNick Terrell #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1318*2aa14b1aSNick Terrell 
1319*2aa14b1aSNick Terrell 
1320e0c1b49fSNick Terrell FORCE_INLINE_TEMPLATE size_t
1321e0c1b49fSNick Terrell DONT_VECTORIZE
1322*2aa14b1aSNick Terrell ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,
1323e0c1b49fSNick Terrell                                void* dst, size_t maxDstSize,
1324e0c1b49fSNick Terrell                          const void* seqStart, size_t seqSize, int nbSeq,
1325e0c1b49fSNick Terrell                          const ZSTD_longOffset_e isLongOffset,
1326e0c1b49fSNick Terrell                          const int frame)
1327e0c1b49fSNick Terrell {
1328e0c1b49fSNick Terrell     const BYTE* ip = (const BYTE*)seqStart;
1329e0c1b49fSNick Terrell     const BYTE* const iend = ip + seqSize;
1330e0c1b49fSNick Terrell     BYTE* const ostart = (BYTE*)dst;
1331e0c1b49fSNick Terrell     BYTE* const oend = ostart + maxDstSize;
1332e0c1b49fSNick Terrell     BYTE* op = ostart;
1333e0c1b49fSNick Terrell     const BYTE* litPtr = dctx->litPtr;
1334*2aa14b1aSNick Terrell     const BYTE* litBufferEnd = dctx->litBufferEnd;
1335e0c1b49fSNick Terrell     const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
1336e0c1b49fSNick Terrell     const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);
1337e0c1b49fSNick Terrell     const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
1338*2aa14b1aSNick Terrell     DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer");
1339e0c1b49fSNick Terrell     (void)frame;
1340e0c1b49fSNick Terrell 
1341e0c1b49fSNick Terrell     /* Regen sequences */
1342e0c1b49fSNick Terrell     if (nbSeq) {
1343e0c1b49fSNick Terrell         seqState_t seqState;
1344e0c1b49fSNick Terrell         dctx->fseEntropy = 1;
1345e0c1b49fSNick Terrell         { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
1346e0c1b49fSNick Terrell         RETURN_ERROR_IF(
1347e0c1b49fSNick Terrell             ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
1348e0c1b49fSNick Terrell             corruption_detected, "");
1349e0c1b49fSNick Terrell         ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
1350e0c1b49fSNick Terrell         ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
1351e0c1b49fSNick Terrell         ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
1352e0c1b49fSNick Terrell         assert(dst != NULL);
1353e0c1b49fSNick Terrell 
1354e0c1b49fSNick Terrell         ZSTD_STATIC_ASSERT(
1355e0c1b49fSNick Terrell                 BIT_DStream_unfinished < BIT_DStream_completed &&
1356e0c1b49fSNick Terrell                 BIT_DStream_endOfBuffer < BIT_DStream_completed &&
1357e0c1b49fSNick Terrell                 BIT_DStream_completed < BIT_DStream_overflow);
1358e0c1b49fSNick Terrell 
1359*2aa14b1aSNick Terrell         /* decompress without overrunning litPtr begins */
1360*2aa14b1aSNick Terrell         {
1361*2aa14b1aSNick Terrell             seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
1362e0c1b49fSNick Terrell             /* Align the decompression loop to 32 + 16 bytes.
1363e0c1b49fSNick Terrell                 *
1364e0c1b49fSNick Terrell                 * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression
1365e0c1b49fSNick Terrell                 * speed swings based on the alignment of the decompression loop. This
1366e0c1b49fSNick Terrell                 * performance swing is caused by parts of the decompression loop falling
1367e0c1b49fSNick Terrell                 * out of the DSB. The entire decompression loop should fit in the DSB,
1368e0c1b49fSNick Terrell                 * when it can't we get much worse performance. You can measure if you've
1369e0c1b49fSNick Terrell                 * hit the good case or the bad case with this perf command for some
1370e0c1b49fSNick Terrell                 * compressed file test.zst:
1371e0c1b49fSNick Terrell                 *
1372e0c1b49fSNick Terrell                 *   perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \
1373e0c1b49fSNick Terrell                 *             -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst
1374e0c1b49fSNick Terrell                 *
1375e0c1b49fSNick Terrell                 * If you see most cycles served out of the MITE you've hit the bad case.
1376e0c1b49fSNick Terrell                 * If you see most cycles served out of the DSB you've hit the good case.
1377e0c1b49fSNick Terrell                 * If it is pretty even then you may be in an okay case.
1378e0c1b49fSNick Terrell                 *
1379*2aa14b1aSNick Terrell                 * This issue has been reproduced on the following CPUs:
1380e0c1b49fSNick Terrell                 *   - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9
1381e0c1b49fSNick Terrell                 *               Use Instruments->Counters to get DSB/MITE cycles.
1382e0c1b49fSNick Terrell                 *               I never got performance swings, but I was able to
1383e0c1b49fSNick Terrell                 *               go from the good case of mostly DSB to half of the
1384e0c1b49fSNick Terrell                 *               cycles served from MITE.
1385e0c1b49fSNick Terrell                 *   - Coffeelake: Intel i9-9900k
1386*2aa14b1aSNick Terrell                 *   - Coffeelake: Intel i7-9700k
1387e0c1b49fSNick Terrell                 *
1388e0c1b49fSNick Terrell                 * I haven't been able to reproduce the instability or DSB misses on any
1389e0c1b49fSNick Terrell                 * of the following CPUS:
1390e0c1b49fSNick Terrell                 *   - Haswell
1391e0c1b49fSNick Terrell                 *   - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH
1392e0c1b49fSNick Terrell                 *   - Skylake
1393e0c1b49fSNick Terrell                 *
1394*2aa14b1aSNick Terrell                 * Alignment is done for each of the three major decompression loops:
1395*2aa14b1aSNick Terrell                 *   - ZSTD_decompressSequences_bodySplitLitBuffer - presplit section of the literal buffer
1396*2aa14b1aSNick Terrell                 *   - ZSTD_decompressSequences_bodySplitLitBuffer - postsplit section of the literal buffer
1397*2aa14b1aSNick Terrell                 *   - ZSTD_decompressSequences_body
1398*2aa14b1aSNick Terrell                 * Alignment choices are made to minimize large swings on bad cases and influence on performance
1399*2aa14b1aSNick Terrell                 * from changes external to this code, rather than to overoptimize on the current commit.
1400*2aa14b1aSNick Terrell                 *
1401e0c1b49fSNick Terrell                 * If you are seeing performance stability this script can help test.
1402e0c1b49fSNick Terrell                 * It tests on 4 commits in zstd where I saw performance change.
1403e0c1b49fSNick Terrell                 *
1404e0c1b49fSNick Terrell                 *   https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4
1405e0c1b49fSNick Terrell                 */
1406*2aa14b1aSNick Terrell #if defined(__x86_64__)
1407*2aa14b1aSNick Terrell             __asm__(".p2align 6");
1408*2aa14b1aSNick Terrell #  if __GNUC__ >= 7
1409*2aa14b1aSNick Terrell 	    /* good for gcc-7, gcc-9, and gcc-11 */
1410*2aa14b1aSNick Terrell             __asm__("nop");
1411e0c1b49fSNick Terrell             __asm__(".p2align 5");
1412e0c1b49fSNick Terrell             __asm__("nop");
1413e0c1b49fSNick Terrell             __asm__(".p2align 4");
1414*2aa14b1aSNick Terrell #    if __GNUC__ == 8 || __GNUC__ == 10
1415*2aa14b1aSNick Terrell 	    /* good for gcc-8 and gcc-10 */
1416*2aa14b1aSNick Terrell             __asm__("nop");
1417*2aa14b1aSNick Terrell             __asm__(".p2align 3");
1418e0c1b49fSNick Terrell #    endif
1419*2aa14b1aSNick Terrell #  endif
1420*2aa14b1aSNick Terrell #endif
1421*2aa14b1aSNick Terrell 
1422*2aa14b1aSNick Terrell             /* Handle the initial state where litBuffer is currently split between dst and litExtraBuffer */
1423*2aa14b1aSNick Terrell             for (; litPtr + sequence.litLength <= dctx->litBufferEnd; ) {
1424*2aa14b1aSNick Terrell                 size_t const oneSeqSize = ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence.litLength - WILDCOPY_OVERLENGTH, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
1425*2aa14b1aSNick Terrell #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1426*2aa14b1aSNick Terrell                 assert(!ZSTD_isError(oneSeqSize));
1427*2aa14b1aSNick Terrell                 if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
1428*2aa14b1aSNick Terrell #endif
1429*2aa14b1aSNick Terrell                 if (UNLIKELY(ZSTD_isError(oneSeqSize)))
1430*2aa14b1aSNick Terrell                     return oneSeqSize;
1431*2aa14b1aSNick Terrell                 DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
1432*2aa14b1aSNick Terrell                 op += oneSeqSize;
1433*2aa14b1aSNick Terrell                 if (UNLIKELY(!--nbSeq))
1434*2aa14b1aSNick Terrell                     break;
1435*2aa14b1aSNick Terrell                 BIT_reloadDStream(&(seqState.DStream));
1436*2aa14b1aSNick Terrell                 sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
1437*2aa14b1aSNick Terrell             }
1438*2aa14b1aSNick Terrell 
1439*2aa14b1aSNick Terrell             /* If there are more sequences, they will need to read literals from litExtraBuffer; copy over the remainder from dst and update litPtr and litEnd */
1440*2aa14b1aSNick Terrell             if (nbSeq > 0) {
1441*2aa14b1aSNick Terrell                 const size_t leftoverLit = dctx->litBufferEnd - litPtr;
1442*2aa14b1aSNick Terrell                 if (leftoverLit)
1443*2aa14b1aSNick Terrell                 {
1444*2aa14b1aSNick Terrell                     RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
1445*2aa14b1aSNick Terrell                     ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
1446*2aa14b1aSNick Terrell                     sequence.litLength -= leftoverLit;
1447*2aa14b1aSNick Terrell                     op += leftoverLit;
1448*2aa14b1aSNick Terrell                 }
1449*2aa14b1aSNick Terrell                 litPtr = dctx->litExtraBuffer;
1450*2aa14b1aSNick Terrell                 litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1451*2aa14b1aSNick Terrell                 dctx->litBufferLocation = ZSTD_not_in_dst;
1452*2aa14b1aSNick Terrell                 {
1453*2aa14b1aSNick Terrell                     size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
1454*2aa14b1aSNick Terrell #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1455*2aa14b1aSNick Terrell                     assert(!ZSTD_isError(oneSeqSize));
1456*2aa14b1aSNick Terrell                     if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
1457*2aa14b1aSNick Terrell #endif
1458*2aa14b1aSNick Terrell                     if (UNLIKELY(ZSTD_isError(oneSeqSize)))
1459*2aa14b1aSNick Terrell                         return oneSeqSize;
1460*2aa14b1aSNick Terrell                     DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
1461*2aa14b1aSNick Terrell                     op += oneSeqSize;
1462*2aa14b1aSNick Terrell                     if (--nbSeq)
1463*2aa14b1aSNick Terrell                         BIT_reloadDStream(&(seqState.DStream));
1464*2aa14b1aSNick Terrell                 }
1465*2aa14b1aSNick Terrell             }
1466*2aa14b1aSNick Terrell         }
1467*2aa14b1aSNick Terrell 
1468*2aa14b1aSNick Terrell         if (nbSeq > 0) /* there is remaining lit from extra buffer */
1469*2aa14b1aSNick Terrell         {
1470*2aa14b1aSNick Terrell 
1471*2aa14b1aSNick Terrell #if defined(__x86_64__)
1472*2aa14b1aSNick Terrell             __asm__(".p2align 6");
1473*2aa14b1aSNick Terrell             __asm__("nop");
1474*2aa14b1aSNick Terrell #  if __GNUC__ != 7
1475*2aa14b1aSNick Terrell             /* worse for gcc-7 better for gcc-8, gcc-9, and gcc-10 and clang */
1476*2aa14b1aSNick Terrell             __asm__(".p2align 4");
1477*2aa14b1aSNick Terrell             __asm__("nop");
1478*2aa14b1aSNick Terrell             __asm__(".p2align 3");
1479*2aa14b1aSNick Terrell #  elif __GNUC__ >= 11
1480*2aa14b1aSNick Terrell             __asm__(".p2align 3");
1481*2aa14b1aSNick Terrell #  else
1482*2aa14b1aSNick Terrell             __asm__(".p2align 5");
1483*2aa14b1aSNick Terrell             __asm__("nop");
1484*2aa14b1aSNick Terrell             __asm__(".p2align 3");
1485*2aa14b1aSNick Terrell #  endif
1486*2aa14b1aSNick Terrell #endif
1487*2aa14b1aSNick Terrell 
1488e0c1b49fSNick Terrell             for (; ; ) {
1489*2aa14b1aSNick Terrell                 seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
1490*2aa14b1aSNick Terrell                 size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
1491*2aa14b1aSNick Terrell #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1492*2aa14b1aSNick Terrell                 assert(!ZSTD_isError(oneSeqSize));
1493*2aa14b1aSNick Terrell                 if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
1494*2aa14b1aSNick Terrell #endif
1495*2aa14b1aSNick Terrell                 if (UNLIKELY(ZSTD_isError(oneSeqSize)))
1496*2aa14b1aSNick Terrell                     return oneSeqSize;
1497*2aa14b1aSNick Terrell                 DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
1498*2aa14b1aSNick Terrell                 op += oneSeqSize;
1499*2aa14b1aSNick Terrell                 if (UNLIKELY(!--nbSeq))
1500*2aa14b1aSNick Terrell                     break;
1501*2aa14b1aSNick Terrell                 BIT_reloadDStream(&(seqState.DStream));
1502*2aa14b1aSNick Terrell             }
1503*2aa14b1aSNick Terrell         }
1504*2aa14b1aSNick Terrell 
1505*2aa14b1aSNick Terrell         /* check if reached exact end */
1506*2aa14b1aSNick Terrell         DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer: after decode loop, remaining nbSeq : %i", nbSeq);
1507*2aa14b1aSNick Terrell         RETURN_ERROR_IF(nbSeq, corruption_detected, "");
1508*2aa14b1aSNick Terrell         RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
1509*2aa14b1aSNick Terrell         /* save reps for next block */
1510*2aa14b1aSNick Terrell         { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
1511*2aa14b1aSNick Terrell     }
1512*2aa14b1aSNick Terrell 
1513*2aa14b1aSNick Terrell     /* last literal segment */
1514*2aa14b1aSNick Terrell     if (dctx->litBufferLocation == ZSTD_split)  /* split hasn't been reached yet, first get dst then copy litExtraBuffer */
1515*2aa14b1aSNick Terrell     {
1516*2aa14b1aSNick Terrell         size_t const lastLLSize = litBufferEnd - litPtr;
1517*2aa14b1aSNick Terrell         RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
1518*2aa14b1aSNick Terrell         if (op != NULL) {
1519*2aa14b1aSNick Terrell             ZSTD_memmove(op, litPtr, lastLLSize);
1520*2aa14b1aSNick Terrell             op += lastLLSize;
1521*2aa14b1aSNick Terrell         }
1522*2aa14b1aSNick Terrell         litPtr = dctx->litExtraBuffer;
1523*2aa14b1aSNick Terrell         litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1524*2aa14b1aSNick Terrell         dctx->litBufferLocation = ZSTD_not_in_dst;
1525*2aa14b1aSNick Terrell     }
1526*2aa14b1aSNick Terrell     {   size_t const lastLLSize = litBufferEnd - litPtr;
1527*2aa14b1aSNick Terrell         RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
1528*2aa14b1aSNick Terrell         if (op != NULL) {
1529*2aa14b1aSNick Terrell             ZSTD_memcpy(op, litPtr, lastLLSize);
1530*2aa14b1aSNick Terrell             op += lastLLSize;
1531*2aa14b1aSNick Terrell         }
1532*2aa14b1aSNick Terrell     }
1533*2aa14b1aSNick Terrell 
1534*2aa14b1aSNick Terrell     return op-ostart;
1535*2aa14b1aSNick Terrell }
1536*2aa14b1aSNick Terrell 
1537*2aa14b1aSNick Terrell FORCE_INLINE_TEMPLATE size_t
1538*2aa14b1aSNick Terrell DONT_VECTORIZE
1539*2aa14b1aSNick Terrell ZSTD_decompressSequences_body(ZSTD_DCtx* dctx,
1540*2aa14b1aSNick Terrell     void* dst, size_t maxDstSize,
1541*2aa14b1aSNick Terrell     const void* seqStart, size_t seqSize, int nbSeq,
1542*2aa14b1aSNick Terrell     const ZSTD_longOffset_e isLongOffset,
1543*2aa14b1aSNick Terrell     const int frame)
1544*2aa14b1aSNick Terrell {
1545*2aa14b1aSNick Terrell     const BYTE* ip = (const BYTE*)seqStart;
1546*2aa14b1aSNick Terrell     const BYTE* const iend = ip + seqSize;
1547*2aa14b1aSNick Terrell     BYTE* const ostart = (BYTE*)dst;
1548*2aa14b1aSNick Terrell     BYTE* const oend = dctx->litBufferLocation == ZSTD_not_in_dst ? ostart + maxDstSize : dctx->litBuffer;
1549*2aa14b1aSNick Terrell     BYTE* op = ostart;
1550*2aa14b1aSNick Terrell     const BYTE* litPtr = dctx->litPtr;
1551*2aa14b1aSNick Terrell     const BYTE* const litEnd = litPtr + dctx->litSize;
1552*2aa14b1aSNick Terrell     const BYTE* const prefixStart = (const BYTE*)(dctx->prefixStart);
1553*2aa14b1aSNick Terrell     const BYTE* const vBase = (const BYTE*)(dctx->virtualStart);
1554*2aa14b1aSNick Terrell     const BYTE* const dictEnd = (const BYTE*)(dctx->dictEnd);
1555*2aa14b1aSNick Terrell     DEBUGLOG(5, "ZSTD_decompressSequences_body");
1556*2aa14b1aSNick Terrell     (void)frame;
1557*2aa14b1aSNick Terrell 
1558*2aa14b1aSNick Terrell     /* Regen sequences */
1559*2aa14b1aSNick Terrell     if (nbSeq) {
1560*2aa14b1aSNick Terrell         seqState_t seqState;
1561*2aa14b1aSNick Terrell         dctx->fseEntropy = 1;
1562*2aa14b1aSNick Terrell         { U32 i; for (i = 0; i < ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
1563*2aa14b1aSNick Terrell         RETURN_ERROR_IF(
1564*2aa14b1aSNick Terrell             ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend - ip)),
1565*2aa14b1aSNick Terrell             corruption_detected, "");
1566*2aa14b1aSNick Terrell         ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
1567*2aa14b1aSNick Terrell         ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
1568*2aa14b1aSNick Terrell         ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
1569*2aa14b1aSNick Terrell         assert(dst != NULL);
1570*2aa14b1aSNick Terrell 
1571*2aa14b1aSNick Terrell         ZSTD_STATIC_ASSERT(
1572*2aa14b1aSNick Terrell             BIT_DStream_unfinished < BIT_DStream_completed &&
1573*2aa14b1aSNick Terrell             BIT_DStream_endOfBuffer < BIT_DStream_completed &&
1574*2aa14b1aSNick Terrell             BIT_DStream_completed < BIT_DStream_overflow);
1575*2aa14b1aSNick Terrell 
1576*2aa14b1aSNick Terrell #if defined(__x86_64__)
1577*2aa14b1aSNick Terrell             __asm__(".p2align 6");
1578*2aa14b1aSNick Terrell             __asm__("nop");
1579*2aa14b1aSNick Terrell #  if __GNUC__ >= 7
1580*2aa14b1aSNick Terrell             __asm__(".p2align 5");
1581*2aa14b1aSNick Terrell             __asm__("nop");
1582*2aa14b1aSNick Terrell             __asm__(".p2align 3");
1583*2aa14b1aSNick Terrell #  else
1584*2aa14b1aSNick Terrell             __asm__(".p2align 4");
1585*2aa14b1aSNick Terrell             __asm__("nop");
1586*2aa14b1aSNick Terrell             __asm__(".p2align 3");
1587*2aa14b1aSNick Terrell #  endif
1588*2aa14b1aSNick Terrell #endif
1589*2aa14b1aSNick Terrell 
1590*2aa14b1aSNick Terrell         for ( ; ; ) {
1591*2aa14b1aSNick Terrell             seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
1592e0c1b49fSNick Terrell             size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
1593e0c1b49fSNick Terrell #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1594e0c1b49fSNick Terrell             assert(!ZSTD_isError(oneSeqSize));
1595e0c1b49fSNick Terrell             if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
1596e0c1b49fSNick Terrell #endif
1597*2aa14b1aSNick Terrell             if (UNLIKELY(ZSTD_isError(oneSeqSize)))
1598*2aa14b1aSNick Terrell                 return oneSeqSize;
1599e0c1b49fSNick Terrell             DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
1600e0c1b49fSNick Terrell             op += oneSeqSize;
1601*2aa14b1aSNick Terrell             if (UNLIKELY(!--nbSeq))
1602e0c1b49fSNick Terrell                 break;
1603*2aa14b1aSNick Terrell             BIT_reloadDStream(&(seqState.DStream));
1604e0c1b49fSNick Terrell         }
1605e0c1b49fSNick Terrell 
1606e0c1b49fSNick Terrell         /* check if reached exact end */
1607e0c1b49fSNick Terrell         DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
1608e0c1b49fSNick Terrell         RETURN_ERROR_IF(nbSeq, corruption_detected, "");
1609e0c1b49fSNick Terrell         RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
1610e0c1b49fSNick Terrell         /* save reps for next block */
1611e0c1b49fSNick Terrell         { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
1612e0c1b49fSNick Terrell     }
1613e0c1b49fSNick Terrell 
1614e0c1b49fSNick Terrell     /* last literal segment */
1615e0c1b49fSNick Terrell     {   size_t const lastLLSize = litEnd - litPtr;
1616e0c1b49fSNick Terrell         RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
1617e0c1b49fSNick Terrell         if (op != NULL) {
1618e0c1b49fSNick Terrell             ZSTD_memcpy(op, litPtr, lastLLSize);
1619e0c1b49fSNick Terrell             op += lastLLSize;
1620e0c1b49fSNick Terrell         }
1621e0c1b49fSNick Terrell     }
1622e0c1b49fSNick Terrell 
1623e0c1b49fSNick Terrell     return op-ostart;
1624e0c1b49fSNick Terrell }
1625e0c1b49fSNick Terrell 
1626e0c1b49fSNick Terrell static size_t
1627e0c1b49fSNick Terrell ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
1628e0c1b49fSNick Terrell                                  void* dst, size_t maxDstSize,
1629e0c1b49fSNick Terrell                            const void* seqStart, size_t seqSize, int nbSeq,
1630e0c1b49fSNick Terrell                            const ZSTD_longOffset_e isLongOffset,
1631e0c1b49fSNick Terrell                            const int frame)
1632e0c1b49fSNick Terrell {
1633e0c1b49fSNick Terrell     return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1634e0c1b49fSNick Terrell }
1635*2aa14b1aSNick Terrell 
1636*2aa14b1aSNick Terrell static size_t
1637*2aa14b1aSNick Terrell ZSTD_decompressSequencesSplitLitBuffer_default(ZSTD_DCtx* dctx,
1638*2aa14b1aSNick Terrell                                                void* dst, size_t maxDstSize,
1639*2aa14b1aSNick Terrell                                          const void* seqStart, size_t seqSize, int nbSeq,
1640*2aa14b1aSNick Terrell                                          const ZSTD_longOffset_e isLongOffset,
1641*2aa14b1aSNick Terrell                                          const int frame)
1642*2aa14b1aSNick Terrell {
1643*2aa14b1aSNick Terrell     return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1644*2aa14b1aSNick Terrell }
1645e0c1b49fSNick Terrell #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1646e0c1b49fSNick Terrell 
1647e0c1b49fSNick Terrell #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1648*2aa14b1aSNick Terrell 
1649*2aa14b1aSNick Terrell FORCE_INLINE_TEMPLATE size_t
1650*2aa14b1aSNick Terrell ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence,
1651*2aa14b1aSNick Terrell                    const BYTE* const prefixStart, const BYTE* const dictEnd)
1652*2aa14b1aSNick Terrell {
1653*2aa14b1aSNick Terrell     prefetchPos += sequence.litLength;
1654*2aa14b1aSNick Terrell     {   const BYTE* const matchBase = (sequence.offset > prefetchPos) ? dictEnd : prefixStart;
1655*2aa14b1aSNick Terrell         const BYTE* const match = matchBase + prefetchPos - sequence.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
1656*2aa14b1aSNick Terrell                                                                               * No consequence though : memory address is only used for prefetching, not for dereferencing */
1657*2aa14b1aSNick Terrell         PREFETCH_L1(match); PREFETCH_L1(match+CACHELINE_SIZE);   /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
1658*2aa14b1aSNick Terrell     }
1659*2aa14b1aSNick Terrell     return prefetchPos + sequence.matchLength;
1660*2aa14b1aSNick Terrell }
1661*2aa14b1aSNick Terrell 
1662*2aa14b1aSNick Terrell /* This decoding function employs prefetching
1663*2aa14b1aSNick Terrell  * to reduce latency impact of cache misses.
1664*2aa14b1aSNick Terrell  * It's generally employed when block contains a significant portion of long-distance matches
1665*2aa14b1aSNick Terrell  * or when coupled with a "cold" dictionary */
1666e0c1b49fSNick Terrell FORCE_INLINE_TEMPLATE size_t
1667e0c1b49fSNick Terrell ZSTD_decompressSequencesLong_body(
1668e0c1b49fSNick Terrell                                ZSTD_DCtx* dctx,
1669e0c1b49fSNick Terrell                                void* dst, size_t maxDstSize,
1670e0c1b49fSNick Terrell                          const void* seqStart, size_t seqSize, int nbSeq,
1671e0c1b49fSNick Terrell                          const ZSTD_longOffset_e isLongOffset,
1672e0c1b49fSNick Terrell                          const int frame)
1673e0c1b49fSNick Terrell {
1674e0c1b49fSNick Terrell     const BYTE* ip = (const BYTE*)seqStart;
1675e0c1b49fSNick Terrell     const BYTE* const iend = ip + seqSize;
1676e0c1b49fSNick Terrell     BYTE* const ostart = (BYTE*)dst;
1677*2aa14b1aSNick Terrell     BYTE* const oend = dctx->litBufferLocation == ZSTD_in_dst ? dctx->litBuffer : ostart + maxDstSize;
1678e0c1b49fSNick Terrell     BYTE* op = ostart;
1679e0c1b49fSNick Terrell     const BYTE* litPtr = dctx->litPtr;
1680*2aa14b1aSNick Terrell     const BYTE* litBufferEnd = dctx->litBufferEnd;
1681e0c1b49fSNick Terrell     const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
1682e0c1b49fSNick Terrell     const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);
1683e0c1b49fSNick Terrell     const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
1684e0c1b49fSNick Terrell     (void)frame;
1685e0c1b49fSNick Terrell 
1686e0c1b49fSNick Terrell     /* Regen sequences */
1687e0c1b49fSNick Terrell     if (nbSeq) {
1688*2aa14b1aSNick Terrell #define STORED_SEQS 8
1689e0c1b49fSNick Terrell #define STORED_SEQS_MASK (STORED_SEQS-1)
1690*2aa14b1aSNick Terrell #define ADVANCED_SEQS STORED_SEQS
1691e0c1b49fSNick Terrell         seq_t sequences[STORED_SEQS];
1692e0c1b49fSNick Terrell         int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
1693e0c1b49fSNick Terrell         seqState_t seqState;
1694e0c1b49fSNick Terrell         int seqNb;
1695*2aa14b1aSNick Terrell         size_t prefetchPos = (size_t)(op-prefixStart); /* track position relative to prefixStart */
1696*2aa14b1aSNick Terrell 
1697e0c1b49fSNick Terrell         dctx->fseEntropy = 1;
1698e0c1b49fSNick Terrell         { int i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
1699e0c1b49fSNick Terrell         assert(dst != NULL);
1700e0c1b49fSNick Terrell         assert(iend >= ip);
1701e0c1b49fSNick Terrell         RETURN_ERROR_IF(
1702e0c1b49fSNick Terrell             ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
1703e0c1b49fSNick Terrell             corruption_detected, "");
1704e0c1b49fSNick Terrell         ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
1705e0c1b49fSNick Terrell         ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
1706e0c1b49fSNick Terrell         ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
1707e0c1b49fSNick Terrell 
1708e0c1b49fSNick Terrell         /* prepare in advance */
1709e0c1b49fSNick Terrell         for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
1710*2aa14b1aSNick Terrell             seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
1711*2aa14b1aSNick Terrell             prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
1712*2aa14b1aSNick Terrell             sequences[seqNb] = sequence;
1713e0c1b49fSNick Terrell         }
1714e0c1b49fSNick Terrell         RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected, "");
1715e0c1b49fSNick Terrell 
1716*2aa14b1aSNick Terrell         /* decompress without stomping litBuffer */
1717e0c1b49fSNick Terrell         for (; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb < nbSeq); seqNb++) {
1718*2aa14b1aSNick Terrell             seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
1719*2aa14b1aSNick Terrell             size_t oneSeqSize;
1720*2aa14b1aSNick Terrell 
1721*2aa14b1aSNick Terrell             if (dctx->litBufferLocation == ZSTD_split && litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength > dctx->litBufferEnd)
1722*2aa14b1aSNick Terrell             {
1723*2aa14b1aSNick Terrell                 /* lit buffer is reaching split point, empty out the first buffer and transition to litExtraBuffer */
1724*2aa14b1aSNick Terrell                 const size_t leftoverLit = dctx->litBufferEnd - litPtr;
1725*2aa14b1aSNick Terrell                 if (leftoverLit)
1726*2aa14b1aSNick Terrell                 {
1727*2aa14b1aSNick Terrell                     RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
1728*2aa14b1aSNick Terrell                     ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
1729*2aa14b1aSNick Terrell                     sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength -= leftoverLit;
1730*2aa14b1aSNick Terrell                     op += leftoverLit;
1731*2aa14b1aSNick Terrell                 }
1732*2aa14b1aSNick Terrell                 litPtr = dctx->litExtraBuffer;
1733*2aa14b1aSNick Terrell                 litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1734*2aa14b1aSNick Terrell                 dctx->litBufferLocation = ZSTD_not_in_dst;
1735*2aa14b1aSNick Terrell                 oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
1736e0c1b49fSNick Terrell #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1737e0c1b49fSNick Terrell                 assert(!ZSTD_isError(oneSeqSize));
1738e0c1b49fSNick Terrell                 if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
1739e0c1b49fSNick Terrell #endif
1740e0c1b49fSNick Terrell                 if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1741*2aa14b1aSNick Terrell 
1742*2aa14b1aSNick Terrell                 prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
1743e0c1b49fSNick Terrell                 sequences[seqNb & STORED_SEQS_MASK] = sequence;
1744e0c1b49fSNick Terrell                 op += oneSeqSize;
1745e0c1b49fSNick Terrell             }
1746*2aa14b1aSNick Terrell             else
1747*2aa14b1aSNick Terrell             {
1748*2aa14b1aSNick Terrell                 /* lit buffer is either wholly contained in first or second split, or not split at all*/
1749*2aa14b1aSNick Terrell                 oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
1750*2aa14b1aSNick Terrell                     ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength - WILDCOPY_OVERLENGTH, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) :
1751*2aa14b1aSNick Terrell                     ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
1752*2aa14b1aSNick Terrell #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1753*2aa14b1aSNick Terrell                 assert(!ZSTD_isError(oneSeqSize));
1754*2aa14b1aSNick Terrell                 if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
1755*2aa14b1aSNick Terrell #endif
1756*2aa14b1aSNick Terrell                 if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1757*2aa14b1aSNick Terrell 
1758*2aa14b1aSNick Terrell                 prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
1759*2aa14b1aSNick Terrell                 sequences[seqNb & STORED_SEQS_MASK] = sequence;
1760*2aa14b1aSNick Terrell                 op += oneSeqSize;
1761*2aa14b1aSNick Terrell             }
1762*2aa14b1aSNick Terrell         }
1763e0c1b49fSNick Terrell         RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected, "");
1764e0c1b49fSNick Terrell 
1765e0c1b49fSNick Terrell         /* finish queue */
1766e0c1b49fSNick Terrell         seqNb -= seqAdvance;
1767e0c1b49fSNick Terrell         for ( ; seqNb<nbSeq ; seqNb++) {
1768*2aa14b1aSNick Terrell             seq_t *sequence = &(sequences[seqNb&STORED_SEQS_MASK]);
1769*2aa14b1aSNick Terrell             if (dctx->litBufferLocation == ZSTD_split && litPtr + sequence->litLength > dctx->litBufferEnd)
1770*2aa14b1aSNick Terrell             {
1771*2aa14b1aSNick Terrell                 const size_t leftoverLit = dctx->litBufferEnd - litPtr;
1772*2aa14b1aSNick Terrell                 if (leftoverLit)
1773*2aa14b1aSNick Terrell                 {
1774*2aa14b1aSNick Terrell                     RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
1775*2aa14b1aSNick Terrell                     ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
1776*2aa14b1aSNick Terrell                     sequence->litLength -= leftoverLit;
1777*2aa14b1aSNick Terrell                     op += leftoverLit;
1778*2aa14b1aSNick Terrell                 }
1779*2aa14b1aSNick Terrell                 litPtr = dctx->litExtraBuffer;
1780*2aa14b1aSNick Terrell                 litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1781*2aa14b1aSNick Terrell                 dctx->litBufferLocation = ZSTD_not_in_dst;
1782*2aa14b1aSNick Terrell                 {
1783*2aa14b1aSNick Terrell                     size_t const oneSeqSize = ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
1784e0c1b49fSNick Terrell #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1785e0c1b49fSNick Terrell                     assert(!ZSTD_isError(oneSeqSize));
1786e0c1b49fSNick Terrell                     if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
1787e0c1b49fSNick Terrell #endif
1788e0c1b49fSNick Terrell                     if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1789e0c1b49fSNick Terrell                     op += oneSeqSize;
1790e0c1b49fSNick Terrell                 }
1791*2aa14b1aSNick Terrell             }
1792*2aa14b1aSNick Terrell             else
1793*2aa14b1aSNick Terrell             {
1794*2aa14b1aSNick Terrell                 size_t const oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
1795*2aa14b1aSNick Terrell                     ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence->litLength - WILDCOPY_OVERLENGTH, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) :
1796*2aa14b1aSNick Terrell                     ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
1797*2aa14b1aSNick Terrell #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1798*2aa14b1aSNick Terrell                 assert(!ZSTD_isError(oneSeqSize));
1799*2aa14b1aSNick Terrell                 if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
1800*2aa14b1aSNick Terrell #endif
1801*2aa14b1aSNick Terrell                 if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1802*2aa14b1aSNick Terrell                 op += oneSeqSize;
1803*2aa14b1aSNick Terrell             }
1804*2aa14b1aSNick Terrell         }
1805e0c1b49fSNick Terrell 
1806e0c1b49fSNick Terrell         /* save reps for next block */
1807e0c1b49fSNick Terrell         { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
1808e0c1b49fSNick Terrell     }
1809e0c1b49fSNick Terrell 
1810e0c1b49fSNick Terrell     /* last literal segment */
1811*2aa14b1aSNick Terrell     if (dctx->litBufferLocation == ZSTD_split)  /* first deplete literal buffer in dst, then copy litExtraBuffer */
1812*2aa14b1aSNick Terrell     {
1813*2aa14b1aSNick Terrell         size_t const lastLLSize = litBufferEnd - litPtr;
1814e0c1b49fSNick Terrell         RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
1815e0c1b49fSNick Terrell         if (op != NULL) {
1816*2aa14b1aSNick Terrell             ZSTD_memmove(op, litPtr, lastLLSize);
1817*2aa14b1aSNick Terrell             op += lastLLSize;
1818*2aa14b1aSNick Terrell         }
1819*2aa14b1aSNick Terrell         litPtr = dctx->litExtraBuffer;
1820*2aa14b1aSNick Terrell         litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1821*2aa14b1aSNick Terrell     }
1822*2aa14b1aSNick Terrell     {   size_t const lastLLSize = litBufferEnd - litPtr;
1823*2aa14b1aSNick Terrell         RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
1824*2aa14b1aSNick Terrell         if (op != NULL) {
1825*2aa14b1aSNick Terrell             ZSTD_memmove(op, litPtr, lastLLSize);
1826e0c1b49fSNick Terrell             op += lastLLSize;
1827e0c1b49fSNick Terrell         }
1828e0c1b49fSNick Terrell     }
1829e0c1b49fSNick Terrell 
1830e0c1b49fSNick Terrell     return op-ostart;
1831e0c1b49fSNick Terrell }
1832e0c1b49fSNick Terrell 
1833e0c1b49fSNick Terrell static size_t
1834e0c1b49fSNick Terrell ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
1835e0c1b49fSNick Terrell                                  void* dst, size_t maxDstSize,
1836e0c1b49fSNick Terrell                            const void* seqStart, size_t seqSize, int nbSeq,
1837e0c1b49fSNick Terrell                            const ZSTD_longOffset_e isLongOffset,
1838e0c1b49fSNick Terrell                            const int frame)
1839e0c1b49fSNick Terrell {
1840e0c1b49fSNick Terrell     return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1841e0c1b49fSNick Terrell }
1842e0c1b49fSNick Terrell #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1843e0c1b49fSNick Terrell 
1844e0c1b49fSNick Terrell 
1845e0c1b49fSNick Terrell 
1846e0c1b49fSNick Terrell #if DYNAMIC_BMI2
1847e0c1b49fSNick Terrell 
1848e0c1b49fSNick Terrell #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1849*2aa14b1aSNick Terrell static BMI2_TARGET_ATTRIBUTE size_t
1850e0c1b49fSNick Terrell DONT_VECTORIZE
1851e0c1b49fSNick Terrell ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
1852e0c1b49fSNick Terrell                                  void* dst, size_t maxDstSize,
1853e0c1b49fSNick Terrell                            const void* seqStart, size_t seqSize, int nbSeq,
1854e0c1b49fSNick Terrell                            const ZSTD_longOffset_e isLongOffset,
1855e0c1b49fSNick Terrell                            const int frame)
1856e0c1b49fSNick Terrell {
1857e0c1b49fSNick Terrell     return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1858e0c1b49fSNick Terrell }
1859*2aa14b1aSNick Terrell static BMI2_TARGET_ATTRIBUTE size_t
1860*2aa14b1aSNick Terrell DONT_VECTORIZE
1861*2aa14b1aSNick Terrell ZSTD_decompressSequencesSplitLitBuffer_bmi2(ZSTD_DCtx* dctx,
1862*2aa14b1aSNick Terrell                                  void* dst, size_t maxDstSize,
1863*2aa14b1aSNick Terrell                            const void* seqStart, size_t seqSize, int nbSeq,
1864*2aa14b1aSNick Terrell                            const ZSTD_longOffset_e isLongOffset,
1865*2aa14b1aSNick Terrell                            const int frame)
1866*2aa14b1aSNick Terrell {
1867*2aa14b1aSNick Terrell     return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1868*2aa14b1aSNick Terrell }
1869e0c1b49fSNick Terrell #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1870e0c1b49fSNick Terrell 
1871e0c1b49fSNick Terrell #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1872*2aa14b1aSNick Terrell static BMI2_TARGET_ATTRIBUTE size_t
1873e0c1b49fSNick Terrell ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
1874e0c1b49fSNick Terrell                                  void* dst, size_t maxDstSize,
1875e0c1b49fSNick Terrell                            const void* seqStart, size_t seqSize, int nbSeq,
1876e0c1b49fSNick Terrell                            const ZSTD_longOffset_e isLongOffset,
1877e0c1b49fSNick Terrell                            const int frame)
1878e0c1b49fSNick Terrell {
1879e0c1b49fSNick Terrell     return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1880e0c1b49fSNick Terrell }
1881e0c1b49fSNick Terrell #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1882e0c1b49fSNick Terrell 
1883e0c1b49fSNick Terrell #endif /* DYNAMIC_BMI2 */
1884e0c1b49fSNick Terrell 
1885e0c1b49fSNick Terrell typedef size_t (*ZSTD_decompressSequences_t)(
1886e0c1b49fSNick Terrell                             ZSTD_DCtx* dctx,
1887e0c1b49fSNick Terrell                             void* dst, size_t maxDstSize,
1888e0c1b49fSNick Terrell                             const void* seqStart, size_t seqSize, int nbSeq,
1889e0c1b49fSNick Terrell                             const ZSTD_longOffset_e isLongOffset,
1890e0c1b49fSNick Terrell                             const int frame);
1891e0c1b49fSNick Terrell 
1892e0c1b49fSNick Terrell #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1893e0c1b49fSNick Terrell static size_t
1894e0c1b49fSNick Terrell ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
1895e0c1b49fSNick Terrell                    const void* seqStart, size_t seqSize, int nbSeq,
1896e0c1b49fSNick Terrell                    const ZSTD_longOffset_e isLongOffset,
1897e0c1b49fSNick Terrell                    const int frame)
1898e0c1b49fSNick Terrell {
1899e0c1b49fSNick Terrell     DEBUGLOG(5, "ZSTD_decompressSequences");
1900e0c1b49fSNick Terrell #if DYNAMIC_BMI2
1901*2aa14b1aSNick Terrell     if (ZSTD_DCtx_get_bmi2(dctx)) {
1902e0c1b49fSNick Terrell         return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1903e0c1b49fSNick Terrell     }
1904e0c1b49fSNick Terrell #endif
1905e0c1b49fSNick Terrell     return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1906e0c1b49fSNick Terrell }
1907*2aa14b1aSNick Terrell static size_t
1908*2aa14b1aSNick Terrell ZSTD_decompressSequencesSplitLitBuffer(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
1909*2aa14b1aSNick Terrell                                  const void* seqStart, size_t seqSize, int nbSeq,
1910*2aa14b1aSNick Terrell                                  const ZSTD_longOffset_e isLongOffset,
1911*2aa14b1aSNick Terrell                                  const int frame)
1912*2aa14b1aSNick Terrell {
1913*2aa14b1aSNick Terrell     DEBUGLOG(5, "ZSTD_decompressSequencesSplitLitBuffer");
1914*2aa14b1aSNick Terrell #if DYNAMIC_BMI2
1915*2aa14b1aSNick Terrell     if (ZSTD_DCtx_get_bmi2(dctx)) {
1916*2aa14b1aSNick Terrell         return ZSTD_decompressSequencesSplitLitBuffer_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1917*2aa14b1aSNick Terrell     }
1918*2aa14b1aSNick Terrell #endif
1919*2aa14b1aSNick Terrell     return ZSTD_decompressSequencesSplitLitBuffer_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1920*2aa14b1aSNick Terrell }
1921e0c1b49fSNick Terrell #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1922e0c1b49fSNick Terrell 
1923e0c1b49fSNick Terrell 
1924e0c1b49fSNick Terrell #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1925e0c1b49fSNick Terrell /* ZSTD_decompressSequencesLong() :
1926e0c1b49fSNick Terrell  * decompression function triggered when a minimum share of offsets is considered "long",
1927e0c1b49fSNick Terrell  * aka out of cache.
1928e0c1b49fSNick Terrell  * note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes meaning "farther than memory cache distance".
1929e0c1b49fSNick Terrell  * This function will try to mitigate main memory latency through the use of prefetching */
1930e0c1b49fSNick Terrell static size_t
1931e0c1b49fSNick Terrell ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
1932e0c1b49fSNick Terrell                              void* dst, size_t maxDstSize,
1933e0c1b49fSNick Terrell                              const void* seqStart, size_t seqSize, int nbSeq,
1934e0c1b49fSNick Terrell                              const ZSTD_longOffset_e isLongOffset,
1935e0c1b49fSNick Terrell                              const int frame)
1936e0c1b49fSNick Terrell {
1937e0c1b49fSNick Terrell     DEBUGLOG(5, "ZSTD_decompressSequencesLong");
1938e0c1b49fSNick Terrell #if DYNAMIC_BMI2
1939*2aa14b1aSNick Terrell     if (ZSTD_DCtx_get_bmi2(dctx)) {
1940e0c1b49fSNick Terrell         return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1941e0c1b49fSNick Terrell     }
1942e0c1b49fSNick Terrell #endif
1943e0c1b49fSNick Terrell   return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1944e0c1b49fSNick Terrell }
1945e0c1b49fSNick Terrell #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1946e0c1b49fSNick Terrell 
1947e0c1b49fSNick Terrell 
1948e0c1b49fSNick Terrell 
1949e0c1b49fSNick Terrell #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
1950e0c1b49fSNick Terrell     !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
1951e0c1b49fSNick Terrell /* ZSTD_getLongOffsetsShare() :
1952e0c1b49fSNick Terrell  * condition : offTable must be valid
1953e0c1b49fSNick Terrell  * @return : "share" of long offsets (arbitrarily defined as > (1<<23))
1954e0c1b49fSNick Terrell  *           compared to maximum possible of (1<<OffFSELog) */
1955e0c1b49fSNick Terrell static unsigned
1956e0c1b49fSNick Terrell ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol* offTable)
1957e0c1b49fSNick Terrell {
1958e0c1b49fSNick Terrell     const void* ptr = offTable;
1959e0c1b49fSNick Terrell     U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[0].tableLog;
1960e0c1b49fSNick Terrell     const ZSTD_seqSymbol* table = offTable + 1;
1961e0c1b49fSNick Terrell     U32 const max = 1 << tableLog;
1962e0c1b49fSNick Terrell     U32 u, total = 0;
1963e0c1b49fSNick Terrell     DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog);
1964e0c1b49fSNick Terrell 
1965e0c1b49fSNick Terrell     assert(max <= (1 << OffFSELog));  /* max not too large */
1966e0c1b49fSNick Terrell     for (u=0; u<max; u++) {
1967e0c1b49fSNick Terrell         if (table[u].nbAdditionalBits > 22) total += 1;
1968e0c1b49fSNick Terrell     }
1969e0c1b49fSNick Terrell 
1970e0c1b49fSNick Terrell     assert(tableLog <= OffFSELog);
1971e0c1b49fSNick Terrell     total <<= (OffFSELog - tableLog);  /* scale to OffFSELog */
1972e0c1b49fSNick Terrell 
1973e0c1b49fSNick Terrell     return total;
1974e0c1b49fSNick Terrell }
1975e0c1b49fSNick Terrell #endif
1976e0c1b49fSNick Terrell 
1977e0c1b49fSNick Terrell size_t
1978e0c1b49fSNick Terrell ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
1979e0c1b49fSNick Terrell                               void* dst, size_t dstCapacity,
1980*2aa14b1aSNick Terrell                         const void* src, size_t srcSize, const int frame, const streaming_operation streaming)
1981e0c1b49fSNick Terrell {   /* blockType == blockCompressed */
1982e0c1b49fSNick Terrell     const BYTE* ip = (const BYTE*)src;
1983e0c1b49fSNick Terrell     /* isLongOffset must be true if there are long offsets.
1984e0c1b49fSNick Terrell      * Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN.
1985e0c1b49fSNick Terrell      * We don't expect that to be the case in 64-bit mode.
1986e0c1b49fSNick Terrell      * In block mode, window size is not known, so we have to be conservative.
1987e0c1b49fSNick Terrell      * (note: but it could be evaluated from current-lowLimit)
1988e0c1b49fSNick Terrell      */
1989e0c1b49fSNick Terrell     ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN))));
1990e0c1b49fSNick Terrell     DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
1991e0c1b49fSNick Terrell 
1992e0c1b49fSNick Terrell     RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, "");
1993e0c1b49fSNick Terrell 
1994e0c1b49fSNick Terrell     /* Decode literals section */
1995*2aa14b1aSNick Terrell     {   size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, streaming);
1996e0c1b49fSNick Terrell         DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : %u", (U32)litCSize);
1997e0c1b49fSNick Terrell         if (ZSTD_isError(litCSize)) return litCSize;
1998e0c1b49fSNick Terrell         ip += litCSize;
1999e0c1b49fSNick Terrell         srcSize -= litCSize;
2000e0c1b49fSNick Terrell     }
2001e0c1b49fSNick Terrell 
2002e0c1b49fSNick Terrell     /* Build Decoding Tables */
2003e0c1b49fSNick Terrell     {
2004e0c1b49fSNick Terrell         /* These macros control at build-time which decompressor implementation
2005e0c1b49fSNick Terrell          * we use. If neither is defined, we do some inspection and dispatch at
2006e0c1b49fSNick Terrell          * runtime.
2007e0c1b49fSNick Terrell          */
2008e0c1b49fSNick Terrell #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
2009e0c1b49fSNick Terrell     !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
2010e0c1b49fSNick Terrell         int usePrefetchDecoder = dctx->ddictIsCold;
2011e0c1b49fSNick Terrell #endif
2012e0c1b49fSNick Terrell         int nbSeq;
2013e0c1b49fSNick Terrell         size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize);
2014e0c1b49fSNick Terrell         if (ZSTD_isError(seqHSize)) return seqHSize;
2015e0c1b49fSNick Terrell         ip += seqHSize;
2016e0c1b49fSNick Terrell         srcSize -= seqHSize;
2017e0c1b49fSNick Terrell 
2018e0c1b49fSNick Terrell         RETURN_ERROR_IF(dst == NULL && nbSeq > 0, dstSize_tooSmall, "NULL not handled");
2019e0c1b49fSNick Terrell 
2020e0c1b49fSNick Terrell #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
2021e0c1b49fSNick Terrell     !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
2022e0c1b49fSNick Terrell         if ( !usePrefetchDecoder
2023e0c1b49fSNick Terrell           && (!frame || (dctx->fParams.windowSize > (1<<24)))
2024e0c1b49fSNick Terrell           && (nbSeq>ADVANCED_SEQS) ) {  /* could probably use a larger nbSeq limit */
2025e0c1b49fSNick Terrell             U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr);
2026e0c1b49fSNick Terrell             U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */
2027e0c1b49fSNick Terrell             usePrefetchDecoder = (shareLongOffsets >= minShare);
2028e0c1b49fSNick Terrell         }
2029e0c1b49fSNick Terrell #endif
2030e0c1b49fSNick Terrell 
2031e0c1b49fSNick Terrell         dctx->ddictIsCold = 0;
2032e0c1b49fSNick Terrell 
2033e0c1b49fSNick Terrell #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
2034e0c1b49fSNick Terrell     !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
2035e0c1b49fSNick Terrell         if (usePrefetchDecoder)
2036e0c1b49fSNick Terrell #endif
2037e0c1b49fSNick Terrell #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
2038e0c1b49fSNick Terrell             return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
2039e0c1b49fSNick Terrell #endif
2040e0c1b49fSNick Terrell 
2041e0c1b49fSNick Terrell #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
2042e0c1b49fSNick Terrell         /* else */
2043*2aa14b1aSNick Terrell         if (dctx->litBufferLocation == ZSTD_split)
2044*2aa14b1aSNick Terrell             return ZSTD_decompressSequencesSplitLitBuffer(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
2045*2aa14b1aSNick Terrell         else
2046e0c1b49fSNick Terrell             return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
2047e0c1b49fSNick Terrell #endif
2048e0c1b49fSNick Terrell     }
2049e0c1b49fSNick Terrell }
2050e0c1b49fSNick Terrell 
2051e0c1b49fSNick Terrell 
2052e0c1b49fSNick Terrell void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize)
2053e0c1b49fSNick Terrell {
2054e0c1b49fSNick Terrell     if (dst != dctx->previousDstEnd && dstSize > 0) {   /* not contiguous */
2055e0c1b49fSNick Terrell         dctx->dictEnd = dctx->previousDstEnd;
2056e0c1b49fSNick Terrell         dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
2057e0c1b49fSNick Terrell         dctx->prefixStart = dst;
2058e0c1b49fSNick Terrell         dctx->previousDstEnd = dst;
2059e0c1b49fSNick Terrell     }
2060e0c1b49fSNick Terrell }
2061e0c1b49fSNick Terrell 
2062e0c1b49fSNick Terrell 
2063e0c1b49fSNick Terrell size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
2064e0c1b49fSNick Terrell                             void* dst, size_t dstCapacity,
2065e0c1b49fSNick Terrell                       const void* src, size_t srcSize)
2066e0c1b49fSNick Terrell {
2067e0c1b49fSNick Terrell     size_t dSize;
2068e0c1b49fSNick Terrell     ZSTD_checkContinuity(dctx, dst, dstCapacity);
2069*2aa14b1aSNick Terrell     dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0, not_streaming);
2070e0c1b49fSNick Terrell     dctx->previousDstEnd = (char*)dst + dSize;
2071e0c1b49fSNick Terrell     return dSize;
2072e0c1b49fSNick Terrell }
2073