xref: /openbmc/linux/lib/zstd/decompress/zstd_ddict.c (revision 03ab8e6297acd1bc0eedaa050e2a1635c576fd11)
1*e0c1b49fSNick Terrell /*
2*e0c1b49fSNick Terrell  * Copyright (c) Yann Collet, Facebook, Inc.
3*e0c1b49fSNick Terrell  * All rights reserved.
4*e0c1b49fSNick Terrell  *
5*e0c1b49fSNick Terrell  * This source code is licensed under both the BSD-style license (found in the
6*e0c1b49fSNick Terrell  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7*e0c1b49fSNick Terrell  * in the COPYING file in the root directory of this source tree).
8*e0c1b49fSNick Terrell  * You may select, at your option, one of the above-listed licenses.
9*e0c1b49fSNick Terrell  */
10*e0c1b49fSNick Terrell 
11*e0c1b49fSNick Terrell /* zstd_ddict.c :
12*e0c1b49fSNick Terrell  * concentrates all logic that needs to know the internals of ZSTD_DDict object */
13*e0c1b49fSNick Terrell 
14*e0c1b49fSNick Terrell /*-*******************************************************
15*e0c1b49fSNick Terrell *  Dependencies
16*e0c1b49fSNick Terrell *********************************************************/
17*e0c1b49fSNick Terrell #include "../common/zstd_deps.h"   /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
18*e0c1b49fSNick Terrell #include "../common/cpu.h"         /* bmi2 */
19*e0c1b49fSNick Terrell #include "../common/mem.h"         /* low level memory routines */
20*e0c1b49fSNick Terrell #define FSE_STATIC_LINKING_ONLY
21*e0c1b49fSNick Terrell #include "../common/fse.h"
22*e0c1b49fSNick Terrell #define HUF_STATIC_LINKING_ONLY
23*e0c1b49fSNick Terrell #include "../common/huf.h"
24*e0c1b49fSNick Terrell #include "zstd_decompress_internal.h"
25*e0c1b49fSNick Terrell #include "zstd_ddict.h"
26*e0c1b49fSNick Terrell 
27*e0c1b49fSNick Terrell 
28*e0c1b49fSNick Terrell 
29*e0c1b49fSNick Terrell 
30*e0c1b49fSNick Terrell /*-*******************************************************
31*e0c1b49fSNick Terrell *  Types
32*e0c1b49fSNick Terrell *********************************************************/
33*e0c1b49fSNick Terrell struct ZSTD_DDict_s {
34*e0c1b49fSNick Terrell     void* dictBuffer;
35*e0c1b49fSNick Terrell     const void* dictContent;
36*e0c1b49fSNick Terrell     size_t dictSize;
37*e0c1b49fSNick Terrell     ZSTD_entropyDTables_t entropy;
38*e0c1b49fSNick Terrell     U32 dictID;
39*e0c1b49fSNick Terrell     U32 entropyPresent;
40*e0c1b49fSNick Terrell     ZSTD_customMem cMem;
41*e0c1b49fSNick Terrell };  /* typedef'd to ZSTD_DDict within "zstd.h" */
42*e0c1b49fSNick Terrell 
ZSTD_DDict_dictContent(const ZSTD_DDict * ddict)43*e0c1b49fSNick Terrell const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict)
44*e0c1b49fSNick Terrell {
45*e0c1b49fSNick Terrell     assert(ddict != NULL);
46*e0c1b49fSNick Terrell     return ddict->dictContent;
47*e0c1b49fSNick Terrell }
48*e0c1b49fSNick Terrell 
ZSTD_DDict_dictSize(const ZSTD_DDict * ddict)49*e0c1b49fSNick Terrell size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict)
50*e0c1b49fSNick Terrell {
51*e0c1b49fSNick Terrell     assert(ddict != NULL);
52*e0c1b49fSNick Terrell     return ddict->dictSize;
53*e0c1b49fSNick Terrell }
54*e0c1b49fSNick Terrell 
ZSTD_copyDDictParameters(ZSTD_DCtx * dctx,const ZSTD_DDict * ddict)55*e0c1b49fSNick Terrell void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
56*e0c1b49fSNick Terrell {
57*e0c1b49fSNick Terrell     DEBUGLOG(4, "ZSTD_copyDDictParameters");
58*e0c1b49fSNick Terrell     assert(dctx != NULL);
59*e0c1b49fSNick Terrell     assert(ddict != NULL);
60*e0c1b49fSNick Terrell     dctx->dictID = ddict->dictID;
61*e0c1b49fSNick Terrell     dctx->prefixStart = ddict->dictContent;
62*e0c1b49fSNick Terrell     dctx->virtualStart = ddict->dictContent;
63*e0c1b49fSNick Terrell     dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize;
64*e0c1b49fSNick Terrell     dctx->previousDstEnd = dctx->dictEnd;
65*e0c1b49fSNick Terrell #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
66*e0c1b49fSNick Terrell     dctx->dictContentBeginForFuzzing = dctx->prefixStart;
67*e0c1b49fSNick Terrell     dctx->dictContentEndForFuzzing = dctx->previousDstEnd;
68*e0c1b49fSNick Terrell #endif
69*e0c1b49fSNick Terrell     if (ddict->entropyPresent) {
70*e0c1b49fSNick Terrell         dctx->litEntropy = 1;
71*e0c1b49fSNick Terrell         dctx->fseEntropy = 1;
72*e0c1b49fSNick Terrell         dctx->LLTptr = ddict->entropy.LLTable;
73*e0c1b49fSNick Terrell         dctx->MLTptr = ddict->entropy.MLTable;
74*e0c1b49fSNick Terrell         dctx->OFTptr = ddict->entropy.OFTable;
75*e0c1b49fSNick Terrell         dctx->HUFptr = ddict->entropy.hufTable;
76*e0c1b49fSNick Terrell         dctx->entropy.rep[0] = ddict->entropy.rep[0];
77*e0c1b49fSNick Terrell         dctx->entropy.rep[1] = ddict->entropy.rep[1];
78*e0c1b49fSNick Terrell         dctx->entropy.rep[2] = ddict->entropy.rep[2];
79*e0c1b49fSNick Terrell     } else {
80*e0c1b49fSNick Terrell         dctx->litEntropy = 0;
81*e0c1b49fSNick Terrell         dctx->fseEntropy = 0;
82*e0c1b49fSNick Terrell     }
83*e0c1b49fSNick Terrell }
84*e0c1b49fSNick Terrell 
85*e0c1b49fSNick Terrell 
86*e0c1b49fSNick Terrell static size_t
ZSTD_loadEntropy_intoDDict(ZSTD_DDict * ddict,ZSTD_dictContentType_e dictContentType)87*e0c1b49fSNick Terrell ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict,
88*e0c1b49fSNick Terrell                            ZSTD_dictContentType_e dictContentType)
89*e0c1b49fSNick Terrell {
90*e0c1b49fSNick Terrell     ddict->dictID = 0;
91*e0c1b49fSNick Terrell     ddict->entropyPresent = 0;
92*e0c1b49fSNick Terrell     if (dictContentType == ZSTD_dct_rawContent) return 0;
93*e0c1b49fSNick Terrell 
94*e0c1b49fSNick Terrell     if (ddict->dictSize < 8) {
95*e0c1b49fSNick Terrell         if (dictContentType == ZSTD_dct_fullDict)
96*e0c1b49fSNick Terrell             return ERROR(dictionary_corrupted);   /* only accept specified dictionaries */
97*e0c1b49fSNick Terrell         return 0;   /* pure content mode */
98*e0c1b49fSNick Terrell     }
99*e0c1b49fSNick Terrell     {   U32 const magic = MEM_readLE32(ddict->dictContent);
100*e0c1b49fSNick Terrell         if (magic != ZSTD_MAGIC_DICTIONARY) {
101*e0c1b49fSNick Terrell             if (dictContentType == ZSTD_dct_fullDict)
102*e0c1b49fSNick Terrell                 return ERROR(dictionary_corrupted);   /* only accept specified dictionaries */
103*e0c1b49fSNick Terrell             return 0;   /* pure content mode */
104*e0c1b49fSNick Terrell         }
105*e0c1b49fSNick Terrell     }
106*e0c1b49fSNick Terrell     ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE);
107*e0c1b49fSNick Terrell 
108*e0c1b49fSNick Terrell     /* load entropy tables */
109*e0c1b49fSNick Terrell     RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy(
110*e0c1b49fSNick Terrell             &ddict->entropy, ddict->dictContent, ddict->dictSize)),
111*e0c1b49fSNick Terrell         dictionary_corrupted, "");
112*e0c1b49fSNick Terrell     ddict->entropyPresent = 1;
113*e0c1b49fSNick Terrell     return 0;
114*e0c1b49fSNick Terrell }
115*e0c1b49fSNick Terrell 
116*e0c1b49fSNick Terrell 
ZSTD_initDDict_internal(ZSTD_DDict * ddict,const void * dict,size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod,ZSTD_dictContentType_e dictContentType)117*e0c1b49fSNick Terrell static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict,
118*e0c1b49fSNick Terrell                                       const void* dict, size_t dictSize,
119*e0c1b49fSNick Terrell                                       ZSTD_dictLoadMethod_e dictLoadMethod,
120*e0c1b49fSNick Terrell                                       ZSTD_dictContentType_e dictContentType)
121*e0c1b49fSNick Terrell {
122*e0c1b49fSNick Terrell     if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) {
123*e0c1b49fSNick Terrell         ddict->dictBuffer = NULL;
124*e0c1b49fSNick Terrell         ddict->dictContent = dict;
125*e0c1b49fSNick Terrell         if (!dict) dictSize = 0;
126*e0c1b49fSNick Terrell     } else {
127*e0c1b49fSNick Terrell         void* const internalBuffer = ZSTD_customMalloc(dictSize, ddict->cMem);
128*e0c1b49fSNick Terrell         ddict->dictBuffer = internalBuffer;
129*e0c1b49fSNick Terrell         ddict->dictContent = internalBuffer;
130*e0c1b49fSNick Terrell         if (!internalBuffer) return ERROR(memory_allocation);
131*e0c1b49fSNick Terrell         ZSTD_memcpy(internalBuffer, dict, dictSize);
132*e0c1b49fSNick Terrell     }
133*e0c1b49fSNick Terrell     ddict->dictSize = dictSize;
134*e0c1b49fSNick Terrell     ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001);  /* cover both little and big endian */
135*e0c1b49fSNick Terrell 
136*e0c1b49fSNick Terrell     /* parse dictionary content */
137*e0c1b49fSNick Terrell     FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , "");
138*e0c1b49fSNick Terrell 
139*e0c1b49fSNick Terrell     return 0;
140*e0c1b49fSNick Terrell }
141*e0c1b49fSNick Terrell 
ZSTD_createDDict_advanced(const void * dict,size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod,ZSTD_dictContentType_e dictContentType,ZSTD_customMem customMem)142*e0c1b49fSNick Terrell ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
143*e0c1b49fSNick Terrell                                       ZSTD_dictLoadMethod_e dictLoadMethod,
144*e0c1b49fSNick Terrell                                       ZSTD_dictContentType_e dictContentType,
145*e0c1b49fSNick Terrell                                       ZSTD_customMem customMem)
146*e0c1b49fSNick Terrell {
147*e0c1b49fSNick Terrell     if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
148*e0c1b49fSNick Terrell 
149*e0c1b49fSNick Terrell     {   ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_customMalloc(sizeof(ZSTD_DDict), customMem);
150*e0c1b49fSNick Terrell         if (ddict == NULL) return NULL;
151*e0c1b49fSNick Terrell         ddict->cMem = customMem;
152*e0c1b49fSNick Terrell         {   size_t const initResult = ZSTD_initDDict_internal(ddict,
153*e0c1b49fSNick Terrell                                             dict, dictSize,
154*e0c1b49fSNick Terrell                                             dictLoadMethod, dictContentType);
155*e0c1b49fSNick Terrell             if (ZSTD_isError(initResult)) {
156*e0c1b49fSNick Terrell                 ZSTD_freeDDict(ddict);
157*e0c1b49fSNick Terrell                 return NULL;
158*e0c1b49fSNick Terrell         }   }
159*e0c1b49fSNick Terrell         return ddict;
160*e0c1b49fSNick Terrell     }
161*e0c1b49fSNick Terrell }
162*e0c1b49fSNick Terrell 
163*e0c1b49fSNick Terrell /*! ZSTD_createDDict() :
164*e0c1b49fSNick Terrell *   Create a digested dictionary, to start decompression without startup delay.
165*e0c1b49fSNick Terrell *   `dict` content is copied inside DDict.
166*e0c1b49fSNick Terrell *   Consequently, `dict` can be released after `ZSTD_DDict` creation */
ZSTD_createDDict(const void * dict,size_t dictSize)167*e0c1b49fSNick Terrell ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize)
168*e0c1b49fSNick Terrell {
169*e0c1b49fSNick Terrell     ZSTD_customMem const allocator = { NULL, NULL, NULL };
170*e0c1b49fSNick Terrell     return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, allocator);
171*e0c1b49fSNick Terrell }
172*e0c1b49fSNick Terrell 
173*e0c1b49fSNick Terrell /*! ZSTD_createDDict_byReference() :
174*e0c1b49fSNick Terrell  *  Create a digested dictionary, to start decompression without startup delay.
175*e0c1b49fSNick Terrell  *  Dictionary content is simply referenced, it will be accessed during decompression.
176*e0c1b49fSNick Terrell  *  Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */
ZSTD_createDDict_byReference(const void * dictBuffer,size_t dictSize)177*e0c1b49fSNick Terrell ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize)
178*e0c1b49fSNick Terrell {
179*e0c1b49fSNick Terrell     ZSTD_customMem const allocator = { NULL, NULL, NULL };
180*e0c1b49fSNick Terrell     return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, allocator);
181*e0c1b49fSNick Terrell }
182*e0c1b49fSNick Terrell 
183*e0c1b49fSNick Terrell 
ZSTD_initStaticDDict(void * sBuffer,size_t sBufferSize,const void * dict,size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod,ZSTD_dictContentType_e dictContentType)184*e0c1b49fSNick Terrell const ZSTD_DDict* ZSTD_initStaticDDict(
185*e0c1b49fSNick Terrell                                 void* sBuffer, size_t sBufferSize,
186*e0c1b49fSNick Terrell                                 const void* dict, size_t dictSize,
187*e0c1b49fSNick Terrell                                 ZSTD_dictLoadMethod_e dictLoadMethod,
188*e0c1b49fSNick Terrell                                 ZSTD_dictContentType_e dictContentType)
189*e0c1b49fSNick Terrell {
190*e0c1b49fSNick Terrell     size_t const neededSpace = sizeof(ZSTD_DDict)
191*e0c1b49fSNick Terrell                              + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
192*e0c1b49fSNick Terrell     ZSTD_DDict* const ddict = (ZSTD_DDict*)sBuffer;
193*e0c1b49fSNick Terrell     assert(sBuffer != NULL);
194*e0c1b49fSNick Terrell     assert(dict != NULL);
195*e0c1b49fSNick Terrell     if ((size_t)sBuffer & 7) return NULL;   /* 8-aligned */
196*e0c1b49fSNick Terrell     if (sBufferSize < neededSpace) return NULL;
197*e0c1b49fSNick Terrell     if (dictLoadMethod == ZSTD_dlm_byCopy) {
198*e0c1b49fSNick Terrell         ZSTD_memcpy(ddict+1, dict, dictSize);  /* local copy */
199*e0c1b49fSNick Terrell         dict = ddict+1;
200*e0c1b49fSNick Terrell     }
201*e0c1b49fSNick Terrell     if (ZSTD_isError( ZSTD_initDDict_internal(ddict,
202*e0c1b49fSNick Terrell                                               dict, dictSize,
203*e0c1b49fSNick Terrell                                               ZSTD_dlm_byRef, dictContentType) ))
204*e0c1b49fSNick Terrell         return NULL;
205*e0c1b49fSNick Terrell     return ddict;
206*e0c1b49fSNick Terrell }
207*e0c1b49fSNick Terrell 
208*e0c1b49fSNick Terrell 
ZSTD_freeDDict(ZSTD_DDict * ddict)209*e0c1b49fSNick Terrell size_t ZSTD_freeDDict(ZSTD_DDict* ddict)
210*e0c1b49fSNick Terrell {
211*e0c1b49fSNick Terrell     if (ddict==NULL) return 0;   /* support free on NULL */
212*e0c1b49fSNick Terrell     {   ZSTD_customMem const cMem = ddict->cMem;
213*e0c1b49fSNick Terrell         ZSTD_customFree(ddict->dictBuffer, cMem);
214*e0c1b49fSNick Terrell         ZSTD_customFree(ddict, cMem);
215*e0c1b49fSNick Terrell         return 0;
216*e0c1b49fSNick Terrell     }
217*e0c1b49fSNick Terrell }
218*e0c1b49fSNick Terrell 
219*e0c1b49fSNick Terrell /*! ZSTD_estimateDDictSize() :
220*e0c1b49fSNick Terrell  *  Estimate amount of memory that will be needed to create a dictionary for decompression.
221*e0c1b49fSNick Terrell  *  Note : dictionary created by reference using ZSTD_dlm_byRef are smaller */
ZSTD_estimateDDictSize(size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod)222*e0c1b49fSNick Terrell size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod)
223*e0c1b49fSNick Terrell {
224*e0c1b49fSNick Terrell     return sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
225*e0c1b49fSNick Terrell }
226*e0c1b49fSNick Terrell 
ZSTD_sizeof_DDict(const ZSTD_DDict * ddict)227*e0c1b49fSNick Terrell size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict)
228*e0c1b49fSNick Terrell {
229*e0c1b49fSNick Terrell     if (ddict==NULL) return 0;   /* support sizeof on NULL */
230*e0c1b49fSNick Terrell     return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0) ;
231*e0c1b49fSNick Terrell }
232*e0c1b49fSNick Terrell 
233*e0c1b49fSNick Terrell /*! ZSTD_getDictID_fromDDict() :
234*e0c1b49fSNick Terrell  *  Provides the dictID of the dictionary loaded into `ddict`.
235*e0c1b49fSNick Terrell  *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
236*e0c1b49fSNick Terrell  *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
ZSTD_getDictID_fromDDict(const ZSTD_DDict * ddict)237*e0c1b49fSNick Terrell unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict)
238*e0c1b49fSNick Terrell {
239*e0c1b49fSNick Terrell     if (ddict==NULL) return 0;
240*e0c1b49fSNick Terrell     return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize);
241*e0c1b49fSNick Terrell }
242