1*e0c1b49fSNick Terrell /* 2*e0c1b49fSNick Terrell * Copyright (c) Yann Collet, Facebook, Inc. 3*e0c1b49fSNick Terrell * All rights reserved. 4*e0c1b49fSNick Terrell * 5*e0c1b49fSNick Terrell * This source code is licensed under both the BSD-style license (found in the 6*e0c1b49fSNick Terrell * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7*e0c1b49fSNick Terrell * in the COPYING file in the root directory of this source tree). 8*e0c1b49fSNick Terrell * You may select, at your option, one of the above-listed licenses. 9*e0c1b49fSNick Terrell */ 10*e0c1b49fSNick Terrell 11*e0c1b49fSNick Terrell /* zstd_ddict.c : 12*e0c1b49fSNick Terrell * concentrates all logic that needs to know the internals of ZSTD_DDict object */ 13*e0c1b49fSNick Terrell 14*e0c1b49fSNick Terrell /*-******************************************************* 15*e0c1b49fSNick Terrell * Dependencies 16*e0c1b49fSNick Terrell *********************************************************/ 17*e0c1b49fSNick Terrell #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */ 18*e0c1b49fSNick Terrell #include "../common/cpu.h" /* bmi2 */ 19*e0c1b49fSNick Terrell #include "../common/mem.h" /* low level memory routines */ 20*e0c1b49fSNick Terrell #define FSE_STATIC_LINKING_ONLY 21*e0c1b49fSNick Terrell #include "../common/fse.h" 22*e0c1b49fSNick Terrell #define HUF_STATIC_LINKING_ONLY 23*e0c1b49fSNick Terrell #include "../common/huf.h" 24*e0c1b49fSNick Terrell #include "zstd_decompress_internal.h" 25*e0c1b49fSNick Terrell #include "zstd_ddict.h" 26*e0c1b49fSNick Terrell 27*e0c1b49fSNick Terrell 28*e0c1b49fSNick Terrell 29*e0c1b49fSNick Terrell 30*e0c1b49fSNick Terrell /*-******************************************************* 31*e0c1b49fSNick Terrell * Types 32*e0c1b49fSNick Terrell *********************************************************/ 33*e0c1b49fSNick Terrell struct ZSTD_DDict_s { 34*e0c1b49fSNick Terrell void* dictBuffer; 35*e0c1b49fSNick Terrell const void* dictContent; 36*e0c1b49fSNick Terrell size_t dictSize; 37*e0c1b49fSNick Terrell ZSTD_entropyDTables_t entropy; 38*e0c1b49fSNick Terrell U32 dictID; 39*e0c1b49fSNick Terrell U32 entropyPresent; 40*e0c1b49fSNick Terrell ZSTD_customMem cMem; 41*e0c1b49fSNick Terrell }; /* typedef'd to ZSTD_DDict within "zstd.h" */ 42*e0c1b49fSNick Terrell 43*e0c1b49fSNick Terrell const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict) 44*e0c1b49fSNick Terrell { 45*e0c1b49fSNick Terrell assert(ddict != NULL); 46*e0c1b49fSNick Terrell return ddict->dictContent; 47*e0c1b49fSNick Terrell } 48*e0c1b49fSNick Terrell 49*e0c1b49fSNick Terrell size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict) 50*e0c1b49fSNick Terrell { 51*e0c1b49fSNick Terrell assert(ddict != NULL); 52*e0c1b49fSNick Terrell return ddict->dictSize; 53*e0c1b49fSNick Terrell } 54*e0c1b49fSNick Terrell 55*e0c1b49fSNick Terrell void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) 56*e0c1b49fSNick Terrell { 57*e0c1b49fSNick Terrell DEBUGLOG(4, "ZSTD_copyDDictParameters"); 58*e0c1b49fSNick Terrell assert(dctx != NULL); 59*e0c1b49fSNick Terrell assert(ddict != NULL); 60*e0c1b49fSNick Terrell dctx->dictID = ddict->dictID; 61*e0c1b49fSNick Terrell dctx->prefixStart = ddict->dictContent; 62*e0c1b49fSNick Terrell dctx->virtualStart = ddict->dictContent; 63*e0c1b49fSNick Terrell dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize; 64*e0c1b49fSNick Terrell dctx->previousDstEnd = dctx->dictEnd; 65*e0c1b49fSNick Terrell #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION 66*e0c1b49fSNick Terrell dctx->dictContentBeginForFuzzing = dctx->prefixStart; 67*e0c1b49fSNick Terrell dctx->dictContentEndForFuzzing = dctx->previousDstEnd; 68*e0c1b49fSNick Terrell #endif 69*e0c1b49fSNick Terrell if (ddict->entropyPresent) { 70*e0c1b49fSNick Terrell dctx->litEntropy = 1; 71*e0c1b49fSNick Terrell dctx->fseEntropy = 1; 72*e0c1b49fSNick Terrell dctx->LLTptr = ddict->entropy.LLTable; 73*e0c1b49fSNick Terrell dctx->MLTptr = ddict->entropy.MLTable; 74*e0c1b49fSNick Terrell dctx->OFTptr = ddict->entropy.OFTable; 75*e0c1b49fSNick Terrell dctx->HUFptr = ddict->entropy.hufTable; 76*e0c1b49fSNick Terrell dctx->entropy.rep[0] = ddict->entropy.rep[0]; 77*e0c1b49fSNick Terrell dctx->entropy.rep[1] = ddict->entropy.rep[1]; 78*e0c1b49fSNick Terrell dctx->entropy.rep[2] = ddict->entropy.rep[2]; 79*e0c1b49fSNick Terrell } else { 80*e0c1b49fSNick Terrell dctx->litEntropy = 0; 81*e0c1b49fSNick Terrell dctx->fseEntropy = 0; 82*e0c1b49fSNick Terrell } 83*e0c1b49fSNick Terrell } 84*e0c1b49fSNick Terrell 85*e0c1b49fSNick Terrell 86*e0c1b49fSNick Terrell static size_t 87*e0c1b49fSNick Terrell ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict, 88*e0c1b49fSNick Terrell ZSTD_dictContentType_e dictContentType) 89*e0c1b49fSNick Terrell { 90*e0c1b49fSNick Terrell ddict->dictID = 0; 91*e0c1b49fSNick Terrell ddict->entropyPresent = 0; 92*e0c1b49fSNick Terrell if (dictContentType == ZSTD_dct_rawContent) return 0; 93*e0c1b49fSNick Terrell 94*e0c1b49fSNick Terrell if (ddict->dictSize < 8) { 95*e0c1b49fSNick Terrell if (dictContentType == ZSTD_dct_fullDict) 96*e0c1b49fSNick Terrell return ERROR(dictionary_corrupted); /* only accept specified dictionaries */ 97*e0c1b49fSNick Terrell return 0; /* pure content mode */ 98*e0c1b49fSNick Terrell } 99*e0c1b49fSNick Terrell { U32 const magic = MEM_readLE32(ddict->dictContent); 100*e0c1b49fSNick Terrell if (magic != ZSTD_MAGIC_DICTIONARY) { 101*e0c1b49fSNick Terrell if (dictContentType == ZSTD_dct_fullDict) 102*e0c1b49fSNick Terrell return ERROR(dictionary_corrupted); /* only accept specified dictionaries */ 103*e0c1b49fSNick Terrell return 0; /* pure content mode */ 104*e0c1b49fSNick Terrell } 105*e0c1b49fSNick Terrell } 106*e0c1b49fSNick Terrell ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE); 107*e0c1b49fSNick Terrell 108*e0c1b49fSNick Terrell /* load entropy tables */ 109*e0c1b49fSNick Terrell RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy( 110*e0c1b49fSNick Terrell &ddict->entropy, ddict->dictContent, ddict->dictSize)), 111*e0c1b49fSNick Terrell dictionary_corrupted, ""); 112*e0c1b49fSNick Terrell ddict->entropyPresent = 1; 113*e0c1b49fSNick Terrell return 0; 114*e0c1b49fSNick Terrell } 115*e0c1b49fSNick Terrell 116*e0c1b49fSNick Terrell 117*e0c1b49fSNick Terrell static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict, 118*e0c1b49fSNick Terrell const void* dict, size_t dictSize, 119*e0c1b49fSNick Terrell ZSTD_dictLoadMethod_e dictLoadMethod, 120*e0c1b49fSNick Terrell ZSTD_dictContentType_e dictContentType) 121*e0c1b49fSNick Terrell { 122*e0c1b49fSNick Terrell if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) { 123*e0c1b49fSNick Terrell ddict->dictBuffer = NULL; 124*e0c1b49fSNick Terrell ddict->dictContent = dict; 125*e0c1b49fSNick Terrell if (!dict) dictSize = 0; 126*e0c1b49fSNick Terrell } else { 127*e0c1b49fSNick Terrell void* const internalBuffer = ZSTD_customMalloc(dictSize, ddict->cMem); 128*e0c1b49fSNick Terrell ddict->dictBuffer = internalBuffer; 129*e0c1b49fSNick Terrell ddict->dictContent = internalBuffer; 130*e0c1b49fSNick Terrell if (!internalBuffer) return ERROR(memory_allocation); 131*e0c1b49fSNick Terrell ZSTD_memcpy(internalBuffer, dict, dictSize); 132*e0c1b49fSNick Terrell } 133*e0c1b49fSNick Terrell ddict->dictSize = dictSize; 134*e0c1b49fSNick Terrell ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ 135*e0c1b49fSNick Terrell 136*e0c1b49fSNick Terrell /* parse dictionary content */ 137*e0c1b49fSNick Terrell FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , ""); 138*e0c1b49fSNick Terrell 139*e0c1b49fSNick Terrell return 0; 140*e0c1b49fSNick Terrell } 141*e0c1b49fSNick Terrell 142*e0c1b49fSNick Terrell ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, 143*e0c1b49fSNick Terrell ZSTD_dictLoadMethod_e dictLoadMethod, 144*e0c1b49fSNick Terrell ZSTD_dictContentType_e dictContentType, 145*e0c1b49fSNick Terrell ZSTD_customMem customMem) 146*e0c1b49fSNick Terrell { 147*e0c1b49fSNick Terrell if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; 148*e0c1b49fSNick Terrell 149*e0c1b49fSNick Terrell { ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_customMalloc(sizeof(ZSTD_DDict), customMem); 150*e0c1b49fSNick Terrell if (ddict == NULL) return NULL; 151*e0c1b49fSNick Terrell ddict->cMem = customMem; 152*e0c1b49fSNick Terrell { size_t const initResult = ZSTD_initDDict_internal(ddict, 153*e0c1b49fSNick Terrell dict, dictSize, 154*e0c1b49fSNick Terrell dictLoadMethod, dictContentType); 155*e0c1b49fSNick Terrell if (ZSTD_isError(initResult)) { 156*e0c1b49fSNick Terrell ZSTD_freeDDict(ddict); 157*e0c1b49fSNick Terrell return NULL; 158*e0c1b49fSNick Terrell } } 159*e0c1b49fSNick Terrell return ddict; 160*e0c1b49fSNick Terrell } 161*e0c1b49fSNick Terrell } 162*e0c1b49fSNick Terrell 163*e0c1b49fSNick Terrell /*! ZSTD_createDDict() : 164*e0c1b49fSNick Terrell * Create a digested dictionary, to start decompression without startup delay. 165*e0c1b49fSNick Terrell * `dict` content is copied inside DDict. 166*e0c1b49fSNick Terrell * Consequently, `dict` can be released after `ZSTD_DDict` creation */ 167*e0c1b49fSNick Terrell ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize) 168*e0c1b49fSNick Terrell { 169*e0c1b49fSNick Terrell ZSTD_customMem const allocator = { NULL, NULL, NULL }; 170*e0c1b49fSNick Terrell return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, allocator); 171*e0c1b49fSNick Terrell } 172*e0c1b49fSNick Terrell 173*e0c1b49fSNick Terrell /*! ZSTD_createDDict_byReference() : 174*e0c1b49fSNick Terrell * Create a digested dictionary, to start decompression without startup delay. 175*e0c1b49fSNick Terrell * Dictionary content is simply referenced, it will be accessed during decompression. 176*e0c1b49fSNick Terrell * Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */ 177*e0c1b49fSNick Terrell ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize) 178*e0c1b49fSNick Terrell { 179*e0c1b49fSNick Terrell ZSTD_customMem const allocator = { NULL, NULL, NULL }; 180*e0c1b49fSNick Terrell return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, allocator); 181*e0c1b49fSNick Terrell } 182*e0c1b49fSNick Terrell 183*e0c1b49fSNick Terrell 184*e0c1b49fSNick Terrell const ZSTD_DDict* ZSTD_initStaticDDict( 185*e0c1b49fSNick Terrell void* sBuffer, size_t sBufferSize, 186*e0c1b49fSNick Terrell const void* dict, size_t dictSize, 187*e0c1b49fSNick Terrell ZSTD_dictLoadMethod_e dictLoadMethod, 188*e0c1b49fSNick Terrell ZSTD_dictContentType_e dictContentType) 189*e0c1b49fSNick Terrell { 190*e0c1b49fSNick Terrell size_t const neededSpace = sizeof(ZSTD_DDict) 191*e0c1b49fSNick Terrell + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize); 192*e0c1b49fSNick Terrell ZSTD_DDict* const ddict = (ZSTD_DDict*)sBuffer; 193*e0c1b49fSNick Terrell assert(sBuffer != NULL); 194*e0c1b49fSNick Terrell assert(dict != NULL); 195*e0c1b49fSNick Terrell if ((size_t)sBuffer & 7) return NULL; /* 8-aligned */ 196*e0c1b49fSNick Terrell if (sBufferSize < neededSpace) return NULL; 197*e0c1b49fSNick Terrell if (dictLoadMethod == ZSTD_dlm_byCopy) { 198*e0c1b49fSNick Terrell ZSTD_memcpy(ddict+1, dict, dictSize); /* local copy */ 199*e0c1b49fSNick Terrell dict = ddict+1; 200*e0c1b49fSNick Terrell } 201*e0c1b49fSNick Terrell if (ZSTD_isError( ZSTD_initDDict_internal(ddict, 202*e0c1b49fSNick Terrell dict, dictSize, 203*e0c1b49fSNick Terrell ZSTD_dlm_byRef, dictContentType) )) 204*e0c1b49fSNick Terrell return NULL; 205*e0c1b49fSNick Terrell return ddict; 206*e0c1b49fSNick Terrell } 207*e0c1b49fSNick Terrell 208*e0c1b49fSNick Terrell 209*e0c1b49fSNick Terrell size_t ZSTD_freeDDict(ZSTD_DDict* ddict) 210*e0c1b49fSNick Terrell { 211*e0c1b49fSNick Terrell if (ddict==NULL) return 0; /* support free on NULL */ 212*e0c1b49fSNick Terrell { ZSTD_customMem const cMem = ddict->cMem; 213*e0c1b49fSNick Terrell ZSTD_customFree(ddict->dictBuffer, cMem); 214*e0c1b49fSNick Terrell ZSTD_customFree(ddict, cMem); 215*e0c1b49fSNick Terrell return 0; 216*e0c1b49fSNick Terrell } 217*e0c1b49fSNick Terrell } 218*e0c1b49fSNick Terrell 219*e0c1b49fSNick Terrell /*! ZSTD_estimateDDictSize() : 220*e0c1b49fSNick Terrell * Estimate amount of memory that will be needed to create a dictionary for decompression. 221*e0c1b49fSNick Terrell * Note : dictionary created by reference using ZSTD_dlm_byRef are smaller */ 222*e0c1b49fSNick Terrell size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod) 223*e0c1b49fSNick Terrell { 224*e0c1b49fSNick Terrell return sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize); 225*e0c1b49fSNick Terrell } 226*e0c1b49fSNick Terrell 227*e0c1b49fSNick Terrell size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict) 228*e0c1b49fSNick Terrell { 229*e0c1b49fSNick Terrell if (ddict==NULL) return 0; /* support sizeof on NULL */ 230*e0c1b49fSNick Terrell return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0) ; 231*e0c1b49fSNick Terrell } 232*e0c1b49fSNick Terrell 233*e0c1b49fSNick Terrell /*! ZSTD_getDictID_fromDDict() : 234*e0c1b49fSNick Terrell * Provides the dictID of the dictionary loaded into `ddict`. 235*e0c1b49fSNick Terrell * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. 236*e0c1b49fSNick Terrell * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ 237*e0c1b49fSNick Terrell unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict) 238*e0c1b49fSNick Terrell { 239*e0c1b49fSNick Terrell if (ddict==NULL) return 0; 240*e0c1b49fSNick Terrell return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize); 241*e0c1b49fSNick Terrell } 242