1*e0c1b49fSNick Terrell /* 2*e0c1b49fSNick Terrell * Copyright (c) Yann Collet, Facebook, Inc. 3*e0c1b49fSNick Terrell * All rights reserved. 4*e0c1b49fSNick Terrell * 5*e0c1b49fSNick Terrell * This source code is licensed under both the BSD-style license (found in the 6*e0c1b49fSNick Terrell * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7*e0c1b49fSNick Terrell * in the COPYING file in the root directory of this source tree). 8*e0c1b49fSNick Terrell * You may select, at your option, one of the above-listed licenses. 9*e0c1b49fSNick Terrell */ 10*e0c1b49fSNick Terrell 11*e0c1b49fSNick Terrell #include "zstd_compress_internal.h" 12*e0c1b49fSNick Terrell #include "zstd_lazy.h" 13*e0c1b49fSNick Terrell 14*e0c1b49fSNick Terrell 15*e0c1b49fSNick Terrell /*-************************************* 16*e0c1b49fSNick Terrell * Binary Tree search 17*e0c1b49fSNick Terrell ***************************************/ 18*e0c1b49fSNick Terrell 19*e0c1b49fSNick Terrell static void 20*e0c1b49fSNick Terrell ZSTD_updateDUBT(ZSTD_matchState_t* ms, 21*e0c1b49fSNick Terrell const BYTE* ip, const BYTE* iend, 22*e0c1b49fSNick Terrell U32 mls) 23*e0c1b49fSNick Terrell { 24*e0c1b49fSNick Terrell const ZSTD_compressionParameters* const cParams = &ms->cParams; 25*e0c1b49fSNick Terrell U32* const hashTable = ms->hashTable; 26*e0c1b49fSNick Terrell U32 const hashLog = cParams->hashLog; 27*e0c1b49fSNick Terrell 28*e0c1b49fSNick Terrell U32* const bt = ms->chainTable; 29*e0c1b49fSNick Terrell U32 const btLog = cParams->chainLog - 1; 30*e0c1b49fSNick Terrell U32 const btMask = (1 << btLog) - 1; 31*e0c1b49fSNick Terrell 32*e0c1b49fSNick Terrell const BYTE* const base = ms->window.base; 33*e0c1b49fSNick Terrell U32 const target = (U32)(ip - base); 34*e0c1b49fSNick Terrell U32 idx = ms->nextToUpdate; 35*e0c1b49fSNick Terrell 36*e0c1b49fSNick Terrell if (idx != target) 37*e0c1b49fSNick Terrell DEBUGLOG(7, "ZSTD_updateDUBT, from %u to %u (dictLimit:%u)", 38*e0c1b49fSNick Terrell idx, target, ms->window.dictLimit); 39*e0c1b49fSNick Terrell assert(ip + 8 <= iend); /* condition for ZSTD_hashPtr */ 40*e0c1b49fSNick Terrell (void)iend; 41*e0c1b49fSNick Terrell 42*e0c1b49fSNick Terrell assert(idx >= ms->window.dictLimit); /* condition for valid base+idx */ 43*e0c1b49fSNick Terrell for ( ; idx < target ; idx++) { 44*e0c1b49fSNick Terrell size_t const h = ZSTD_hashPtr(base + idx, hashLog, mls); /* assumption : ip + 8 <= iend */ 45*e0c1b49fSNick Terrell U32 const matchIndex = hashTable[h]; 46*e0c1b49fSNick Terrell 47*e0c1b49fSNick Terrell U32* const nextCandidatePtr = bt + 2*(idx&btMask); 48*e0c1b49fSNick Terrell U32* const sortMarkPtr = nextCandidatePtr + 1; 49*e0c1b49fSNick Terrell 50*e0c1b49fSNick Terrell DEBUGLOG(8, "ZSTD_updateDUBT: insert %u", idx); 51*e0c1b49fSNick Terrell hashTable[h] = idx; /* Update Hash Table */ 52*e0c1b49fSNick Terrell *nextCandidatePtr = matchIndex; /* update BT like a chain */ 53*e0c1b49fSNick Terrell *sortMarkPtr = ZSTD_DUBT_UNSORTED_MARK; 54*e0c1b49fSNick Terrell } 55*e0c1b49fSNick Terrell ms->nextToUpdate = target; 56*e0c1b49fSNick Terrell } 57*e0c1b49fSNick Terrell 58*e0c1b49fSNick Terrell 59*e0c1b49fSNick Terrell /* ZSTD_insertDUBT1() : 60*e0c1b49fSNick Terrell * sort one already inserted but unsorted position 61*e0c1b49fSNick Terrell * assumption : curr >= btlow == (curr - btmask) 62*e0c1b49fSNick Terrell * doesn't fail */ 63*e0c1b49fSNick Terrell static void 64*e0c1b49fSNick Terrell ZSTD_insertDUBT1(ZSTD_matchState_t* ms, 65*e0c1b49fSNick Terrell U32 curr, const BYTE* inputEnd, 66*e0c1b49fSNick Terrell U32 nbCompares, U32 btLow, 67*e0c1b49fSNick Terrell const ZSTD_dictMode_e dictMode) 68*e0c1b49fSNick Terrell { 69*e0c1b49fSNick Terrell const ZSTD_compressionParameters* const cParams = &ms->cParams; 70*e0c1b49fSNick Terrell U32* const bt = ms->chainTable; 71*e0c1b49fSNick Terrell U32 const btLog = cParams->chainLog - 1; 72*e0c1b49fSNick Terrell U32 const btMask = (1 << btLog) - 1; 73*e0c1b49fSNick Terrell size_t commonLengthSmaller=0, commonLengthLarger=0; 74*e0c1b49fSNick Terrell const BYTE* const base = ms->window.base; 75*e0c1b49fSNick Terrell const BYTE* const dictBase = ms->window.dictBase; 76*e0c1b49fSNick Terrell const U32 dictLimit = ms->window.dictLimit; 77*e0c1b49fSNick Terrell const BYTE* const ip = (curr>=dictLimit) ? base + curr : dictBase + curr; 78*e0c1b49fSNick Terrell const BYTE* const iend = (curr>=dictLimit) ? inputEnd : dictBase + dictLimit; 79*e0c1b49fSNick Terrell const BYTE* const dictEnd = dictBase + dictLimit; 80*e0c1b49fSNick Terrell const BYTE* const prefixStart = base + dictLimit; 81*e0c1b49fSNick Terrell const BYTE* match; 82*e0c1b49fSNick Terrell U32* smallerPtr = bt + 2*(curr&btMask); 83*e0c1b49fSNick Terrell U32* largerPtr = smallerPtr + 1; 84*e0c1b49fSNick Terrell U32 matchIndex = *smallerPtr; /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */ 85*e0c1b49fSNick Terrell U32 dummy32; /* to be nullified at the end */ 86*e0c1b49fSNick Terrell U32 const windowValid = ms->window.lowLimit; 87*e0c1b49fSNick Terrell U32 const maxDistance = 1U << cParams->windowLog; 88*e0c1b49fSNick Terrell U32 const windowLow = (curr - windowValid > maxDistance) ? curr - maxDistance : windowValid; 89*e0c1b49fSNick Terrell 90*e0c1b49fSNick Terrell 91*e0c1b49fSNick Terrell DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)", 92*e0c1b49fSNick Terrell curr, dictLimit, windowLow); 93*e0c1b49fSNick Terrell assert(curr >= btLow); 94*e0c1b49fSNick Terrell assert(ip < iend); /* condition for ZSTD_count */ 95*e0c1b49fSNick Terrell 96*e0c1b49fSNick Terrell for (; nbCompares && (matchIndex > windowLow); --nbCompares) { 97*e0c1b49fSNick Terrell U32* const nextPtr = bt + 2*(matchIndex & btMask); 98*e0c1b49fSNick Terrell size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ 99*e0c1b49fSNick Terrell assert(matchIndex < curr); 100*e0c1b49fSNick Terrell /* note : all candidates are now supposed sorted, 101*e0c1b49fSNick Terrell * but it's still possible to have nextPtr[1] == ZSTD_DUBT_UNSORTED_MARK 102*e0c1b49fSNick Terrell * when a real index has the same value as ZSTD_DUBT_UNSORTED_MARK */ 103*e0c1b49fSNick Terrell 104*e0c1b49fSNick Terrell if ( (dictMode != ZSTD_extDict) 105*e0c1b49fSNick Terrell || (matchIndex+matchLength >= dictLimit) /* both in current segment*/ 106*e0c1b49fSNick Terrell || (curr < dictLimit) /* both in extDict */) { 107*e0c1b49fSNick Terrell const BYTE* const mBase = ( (dictMode != ZSTD_extDict) 108*e0c1b49fSNick Terrell || (matchIndex+matchLength >= dictLimit)) ? 109*e0c1b49fSNick Terrell base : dictBase; 110*e0c1b49fSNick Terrell assert( (matchIndex+matchLength >= dictLimit) /* might be wrong if extDict is incorrectly set to 0 */ 111*e0c1b49fSNick Terrell || (curr < dictLimit) ); 112*e0c1b49fSNick Terrell match = mBase + matchIndex; 113*e0c1b49fSNick Terrell matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); 114*e0c1b49fSNick Terrell } else { 115*e0c1b49fSNick Terrell match = dictBase + matchIndex; 116*e0c1b49fSNick Terrell matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); 117*e0c1b49fSNick Terrell if (matchIndex+matchLength >= dictLimit) 118*e0c1b49fSNick Terrell match = base + matchIndex; /* preparation for next read of match[matchLength] */ 119*e0c1b49fSNick Terrell } 120*e0c1b49fSNick Terrell 121*e0c1b49fSNick Terrell DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ", 122*e0c1b49fSNick Terrell curr, matchIndex, (U32)matchLength); 123*e0c1b49fSNick Terrell 124*e0c1b49fSNick Terrell if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ 125*e0c1b49fSNick Terrell break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */ 126*e0c1b49fSNick Terrell } 127*e0c1b49fSNick Terrell 128*e0c1b49fSNick Terrell if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */ 129*e0c1b49fSNick Terrell /* match is smaller than current */ 130*e0c1b49fSNick Terrell *smallerPtr = matchIndex; /* update smaller idx */ 131*e0c1b49fSNick Terrell commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ 132*e0c1b49fSNick Terrell if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */ 133*e0c1b49fSNick Terrell DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is smaller : next => %u", 134*e0c1b49fSNick Terrell matchIndex, btLow, nextPtr[1]); 135*e0c1b49fSNick Terrell smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */ 136*e0c1b49fSNick Terrell matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */ 137*e0c1b49fSNick Terrell } else { 138*e0c1b49fSNick Terrell /* match is larger than current */ 139*e0c1b49fSNick Terrell *largerPtr = matchIndex; 140*e0c1b49fSNick Terrell commonLengthLarger = matchLength; 141*e0c1b49fSNick Terrell if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */ 142*e0c1b49fSNick Terrell DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is larger => %u", 143*e0c1b49fSNick Terrell matchIndex, btLow, nextPtr[0]); 144*e0c1b49fSNick Terrell largerPtr = nextPtr; 145*e0c1b49fSNick Terrell matchIndex = nextPtr[0]; 146*e0c1b49fSNick Terrell } } 147*e0c1b49fSNick Terrell 148*e0c1b49fSNick Terrell *smallerPtr = *largerPtr = 0; 149*e0c1b49fSNick Terrell } 150*e0c1b49fSNick Terrell 151*e0c1b49fSNick Terrell 152*e0c1b49fSNick Terrell static size_t 153*e0c1b49fSNick Terrell ZSTD_DUBT_findBetterDictMatch ( 154*e0c1b49fSNick Terrell ZSTD_matchState_t* ms, 155*e0c1b49fSNick Terrell const BYTE* const ip, const BYTE* const iend, 156*e0c1b49fSNick Terrell size_t* offsetPtr, 157*e0c1b49fSNick Terrell size_t bestLength, 158*e0c1b49fSNick Terrell U32 nbCompares, 159*e0c1b49fSNick Terrell U32 const mls, 160*e0c1b49fSNick Terrell const ZSTD_dictMode_e dictMode) 161*e0c1b49fSNick Terrell { 162*e0c1b49fSNick Terrell const ZSTD_matchState_t * const dms = ms->dictMatchState; 163*e0c1b49fSNick Terrell const ZSTD_compressionParameters* const dmsCParams = &dms->cParams; 164*e0c1b49fSNick Terrell const U32 * const dictHashTable = dms->hashTable; 165*e0c1b49fSNick Terrell U32 const hashLog = dmsCParams->hashLog; 166*e0c1b49fSNick Terrell size_t const h = ZSTD_hashPtr(ip, hashLog, mls); 167*e0c1b49fSNick Terrell U32 dictMatchIndex = dictHashTable[h]; 168*e0c1b49fSNick Terrell 169*e0c1b49fSNick Terrell const BYTE* const base = ms->window.base; 170*e0c1b49fSNick Terrell const BYTE* const prefixStart = base + ms->window.dictLimit; 171*e0c1b49fSNick Terrell U32 const curr = (U32)(ip-base); 172*e0c1b49fSNick Terrell const BYTE* const dictBase = dms->window.base; 173*e0c1b49fSNick Terrell const BYTE* const dictEnd = dms->window.nextSrc; 174*e0c1b49fSNick Terrell U32 const dictHighLimit = (U32)(dms->window.nextSrc - dms->window.base); 175*e0c1b49fSNick Terrell U32 const dictLowLimit = dms->window.lowLimit; 176*e0c1b49fSNick Terrell U32 const dictIndexDelta = ms->window.lowLimit - dictHighLimit; 177*e0c1b49fSNick Terrell 178*e0c1b49fSNick Terrell U32* const dictBt = dms->chainTable; 179*e0c1b49fSNick Terrell U32 const btLog = dmsCParams->chainLog - 1; 180*e0c1b49fSNick Terrell U32 const btMask = (1 << btLog) - 1; 181*e0c1b49fSNick Terrell U32 const btLow = (btMask >= dictHighLimit - dictLowLimit) ? dictLowLimit : dictHighLimit - btMask; 182*e0c1b49fSNick Terrell 183*e0c1b49fSNick Terrell size_t commonLengthSmaller=0, commonLengthLarger=0; 184*e0c1b49fSNick Terrell 185*e0c1b49fSNick Terrell (void)dictMode; 186*e0c1b49fSNick Terrell assert(dictMode == ZSTD_dictMatchState); 187*e0c1b49fSNick Terrell 188*e0c1b49fSNick Terrell for (; nbCompares && (dictMatchIndex > dictLowLimit); --nbCompares) { 189*e0c1b49fSNick Terrell U32* const nextPtr = dictBt + 2*(dictMatchIndex & btMask); 190*e0c1b49fSNick Terrell size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ 191*e0c1b49fSNick Terrell const BYTE* match = dictBase + dictMatchIndex; 192*e0c1b49fSNick Terrell matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); 193*e0c1b49fSNick Terrell if (dictMatchIndex+matchLength >= dictHighLimit) 194*e0c1b49fSNick Terrell match = base + dictMatchIndex + dictIndexDelta; /* to prepare for next usage of match[matchLength] */ 195*e0c1b49fSNick Terrell 196*e0c1b49fSNick Terrell if (matchLength > bestLength) { 197*e0c1b49fSNick Terrell U32 matchIndex = dictMatchIndex + dictIndexDelta; 198*e0c1b49fSNick Terrell if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) { 199*e0c1b49fSNick Terrell DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)", 200*e0c1b49fSNick Terrell curr, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + curr - matchIndex, dictMatchIndex, matchIndex); 201*e0c1b49fSNick Terrell bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex; 202*e0c1b49fSNick Terrell } 203*e0c1b49fSNick Terrell if (ip+matchLength == iend) { /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */ 204*e0c1b49fSNick Terrell break; /* drop, to guarantee consistency (miss a little bit of compression) */ 205*e0c1b49fSNick Terrell } 206*e0c1b49fSNick Terrell } 207*e0c1b49fSNick Terrell 208*e0c1b49fSNick Terrell if (match[matchLength] < ip[matchLength]) { 209*e0c1b49fSNick Terrell if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */ 210*e0c1b49fSNick Terrell commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ 211*e0c1b49fSNick Terrell dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ 212*e0c1b49fSNick Terrell } else { 213*e0c1b49fSNick Terrell /* match is larger than current */ 214*e0c1b49fSNick Terrell if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */ 215*e0c1b49fSNick Terrell commonLengthLarger = matchLength; 216*e0c1b49fSNick Terrell dictMatchIndex = nextPtr[0]; 217*e0c1b49fSNick Terrell } 218*e0c1b49fSNick Terrell } 219*e0c1b49fSNick Terrell 220*e0c1b49fSNick Terrell if (bestLength >= MINMATCH) { 221*e0c1b49fSNick Terrell U32 const mIndex = curr - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex; 222*e0c1b49fSNick Terrell DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)", 223*e0c1b49fSNick Terrell curr, (U32)bestLength, (U32)*offsetPtr, mIndex); 224*e0c1b49fSNick Terrell } 225*e0c1b49fSNick Terrell return bestLength; 226*e0c1b49fSNick Terrell 227*e0c1b49fSNick Terrell } 228*e0c1b49fSNick Terrell 229*e0c1b49fSNick Terrell 230*e0c1b49fSNick Terrell static size_t 231*e0c1b49fSNick Terrell ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms, 232*e0c1b49fSNick Terrell const BYTE* const ip, const BYTE* const iend, 233*e0c1b49fSNick Terrell size_t* offsetPtr, 234*e0c1b49fSNick Terrell U32 const mls, 235*e0c1b49fSNick Terrell const ZSTD_dictMode_e dictMode) 236*e0c1b49fSNick Terrell { 237*e0c1b49fSNick Terrell const ZSTD_compressionParameters* const cParams = &ms->cParams; 238*e0c1b49fSNick Terrell U32* const hashTable = ms->hashTable; 239*e0c1b49fSNick Terrell U32 const hashLog = cParams->hashLog; 240*e0c1b49fSNick Terrell size_t const h = ZSTD_hashPtr(ip, hashLog, mls); 241*e0c1b49fSNick Terrell U32 matchIndex = hashTable[h]; 242*e0c1b49fSNick Terrell 243*e0c1b49fSNick Terrell const BYTE* const base = ms->window.base; 244*e0c1b49fSNick Terrell U32 const curr = (U32)(ip-base); 245*e0c1b49fSNick Terrell U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog); 246*e0c1b49fSNick Terrell 247*e0c1b49fSNick Terrell U32* const bt = ms->chainTable; 248*e0c1b49fSNick Terrell U32 const btLog = cParams->chainLog - 1; 249*e0c1b49fSNick Terrell U32 const btMask = (1 << btLog) - 1; 250*e0c1b49fSNick Terrell U32 const btLow = (btMask >= curr) ? 0 : curr - btMask; 251*e0c1b49fSNick Terrell U32 const unsortLimit = MAX(btLow, windowLow); 252*e0c1b49fSNick Terrell 253*e0c1b49fSNick Terrell U32* nextCandidate = bt + 2*(matchIndex&btMask); 254*e0c1b49fSNick Terrell U32* unsortedMark = bt + 2*(matchIndex&btMask) + 1; 255*e0c1b49fSNick Terrell U32 nbCompares = 1U << cParams->searchLog; 256*e0c1b49fSNick Terrell U32 nbCandidates = nbCompares; 257*e0c1b49fSNick Terrell U32 previousCandidate = 0; 258*e0c1b49fSNick Terrell 259*e0c1b49fSNick Terrell DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", curr); 260*e0c1b49fSNick Terrell assert(ip <= iend-8); /* required for h calculation */ 261*e0c1b49fSNick Terrell assert(dictMode != ZSTD_dedicatedDictSearch); 262*e0c1b49fSNick Terrell 263*e0c1b49fSNick Terrell /* reach end of unsorted candidates list */ 264*e0c1b49fSNick Terrell while ( (matchIndex > unsortLimit) 265*e0c1b49fSNick Terrell && (*unsortedMark == ZSTD_DUBT_UNSORTED_MARK) 266*e0c1b49fSNick Terrell && (nbCandidates > 1) ) { 267*e0c1b49fSNick Terrell DEBUGLOG(8, "ZSTD_DUBT_findBestMatch: candidate %u is unsorted", 268*e0c1b49fSNick Terrell matchIndex); 269*e0c1b49fSNick Terrell *unsortedMark = previousCandidate; /* the unsortedMark becomes a reversed chain, to move up back to original position */ 270*e0c1b49fSNick Terrell previousCandidate = matchIndex; 271*e0c1b49fSNick Terrell matchIndex = *nextCandidate; 272*e0c1b49fSNick Terrell nextCandidate = bt + 2*(matchIndex&btMask); 273*e0c1b49fSNick Terrell unsortedMark = bt + 2*(matchIndex&btMask) + 1; 274*e0c1b49fSNick Terrell nbCandidates --; 275*e0c1b49fSNick Terrell } 276*e0c1b49fSNick Terrell 277*e0c1b49fSNick Terrell /* nullify last candidate if it's still unsorted 278*e0c1b49fSNick Terrell * simplification, detrimental to compression ratio, beneficial for speed */ 279*e0c1b49fSNick Terrell if ( (matchIndex > unsortLimit) 280*e0c1b49fSNick Terrell && (*unsortedMark==ZSTD_DUBT_UNSORTED_MARK) ) { 281*e0c1b49fSNick Terrell DEBUGLOG(7, "ZSTD_DUBT_findBestMatch: nullify last unsorted candidate %u", 282*e0c1b49fSNick Terrell matchIndex); 283*e0c1b49fSNick Terrell *nextCandidate = *unsortedMark = 0; 284*e0c1b49fSNick Terrell } 285*e0c1b49fSNick Terrell 286*e0c1b49fSNick Terrell /* batch sort stacked candidates */ 287*e0c1b49fSNick Terrell matchIndex = previousCandidate; 288*e0c1b49fSNick Terrell while (matchIndex) { /* will end on matchIndex == 0 */ 289*e0c1b49fSNick Terrell U32* const nextCandidateIdxPtr = bt + 2*(matchIndex&btMask) + 1; 290*e0c1b49fSNick Terrell U32 const nextCandidateIdx = *nextCandidateIdxPtr; 291*e0c1b49fSNick Terrell ZSTD_insertDUBT1(ms, matchIndex, iend, 292*e0c1b49fSNick Terrell nbCandidates, unsortLimit, dictMode); 293*e0c1b49fSNick Terrell matchIndex = nextCandidateIdx; 294*e0c1b49fSNick Terrell nbCandidates++; 295*e0c1b49fSNick Terrell } 296*e0c1b49fSNick Terrell 297*e0c1b49fSNick Terrell /* find longest match */ 298*e0c1b49fSNick Terrell { size_t commonLengthSmaller = 0, commonLengthLarger = 0; 299*e0c1b49fSNick Terrell const BYTE* const dictBase = ms->window.dictBase; 300*e0c1b49fSNick Terrell const U32 dictLimit = ms->window.dictLimit; 301*e0c1b49fSNick Terrell const BYTE* const dictEnd = dictBase + dictLimit; 302*e0c1b49fSNick Terrell const BYTE* const prefixStart = base + dictLimit; 303*e0c1b49fSNick Terrell U32* smallerPtr = bt + 2*(curr&btMask); 304*e0c1b49fSNick Terrell U32* largerPtr = bt + 2*(curr&btMask) + 1; 305*e0c1b49fSNick Terrell U32 matchEndIdx = curr + 8 + 1; 306*e0c1b49fSNick Terrell U32 dummy32; /* to be nullified at the end */ 307*e0c1b49fSNick Terrell size_t bestLength = 0; 308*e0c1b49fSNick Terrell 309*e0c1b49fSNick Terrell matchIndex = hashTable[h]; 310*e0c1b49fSNick Terrell hashTable[h] = curr; /* Update Hash Table */ 311*e0c1b49fSNick Terrell 312*e0c1b49fSNick Terrell for (; nbCompares && (matchIndex > windowLow); --nbCompares) { 313*e0c1b49fSNick Terrell U32* const nextPtr = bt + 2*(matchIndex & btMask); 314*e0c1b49fSNick Terrell size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ 315*e0c1b49fSNick Terrell const BYTE* match; 316*e0c1b49fSNick Terrell 317*e0c1b49fSNick Terrell if ((dictMode != ZSTD_extDict) || (matchIndex+matchLength >= dictLimit)) { 318*e0c1b49fSNick Terrell match = base + matchIndex; 319*e0c1b49fSNick Terrell matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); 320*e0c1b49fSNick Terrell } else { 321*e0c1b49fSNick Terrell match = dictBase + matchIndex; 322*e0c1b49fSNick Terrell matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); 323*e0c1b49fSNick Terrell if (matchIndex+matchLength >= dictLimit) 324*e0c1b49fSNick Terrell match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ 325*e0c1b49fSNick Terrell } 326*e0c1b49fSNick Terrell 327*e0c1b49fSNick Terrell if (matchLength > bestLength) { 328*e0c1b49fSNick Terrell if (matchLength > matchEndIdx - matchIndex) 329*e0c1b49fSNick Terrell matchEndIdx = matchIndex + (U32)matchLength; 330*e0c1b49fSNick Terrell if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) 331*e0c1b49fSNick Terrell bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex; 332*e0c1b49fSNick Terrell if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ 333*e0c1b49fSNick Terrell if (dictMode == ZSTD_dictMatchState) { 334*e0c1b49fSNick Terrell nbCompares = 0; /* in addition to avoiding checking any 335*e0c1b49fSNick Terrell * further in this loop, make sure we 336*e0c1b49fSNick Terrell * skip checking in the dictionary. */ 337*e0c1b49fSNick Terrell } 338*e0c1b49fSNick Terrell break; /* drop, to guarantee consistency (miss a little bit of compression) */ 339*e0c1b49fSNick Terrell } 340*e0c1b49fSNick Terrell } 341*e0c1b49fSNick Terrell 342*e0c1b49fSNick Terrell if (match[matchLength] < ip[matchLength]) { 343*e0c1b49fSNick Terrell /* match is smaller than current */ 344*e0c1b49fSNick Terrell *smallerPtr = matchIndex; /* update smaller idx */ 345*e0c1b49fSNick Terrell commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ 346*e0c1b49fSNick Terrell if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ 347*e0c1b49fSNick Terrell smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ 348*e0c1b49fSNick Terrell matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ 349*e0c1b49fSNick Terrell } else { 350*e0c1b49fSNick Terrell /* match is larger than current */ 351*e0c1b49fSNick Terrell *largerPtr = matchIndex; 352*e0c1b49fSNick Terrell commonLengthLarger = matchLength; 353*e0c1b49fSNick Terrell if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ 354*e0c1b49fSNick Terrell largerPtr = nextPtr; 355*e0c1b49fSNick Terrell matchIndex = nextPtr[0]; 356*e0c1b49fSNick Terrell } } 357*e0c1b49fSNick Terrell 358*e0c1b49fSNick Terrell *smallerPtr = *largerPtr = 0; 359*e0c1b49fSNick Terrell 360*e0c1b49fSNick Terrell assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */ 361*e0c1b49fSNick Terrell if (dictMode == ZSTD_dictMatchState && nbCompares) { 362*e0c1b49fSNick Terrell bestLength = ZSTD_DUBT_findBetterDictMatch( 363*e0c1b49fSNick Terrell ms, ip, iend, 364*e0c1b49fSNick Terrell offsetPtr, bestLength, nbCompares, 365*e0c1b49fSNick Terrell mls, dictMode); 366*e0c1b49fSNick Terrell } 367*e0c1b49fSNick Terrell 368*e0c1b49fSNick Terrell assert(matchEndIdx > curr+8); /* ensure nextToUpdate is increased */ 369*e0c1b49fSNick Terrell ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */ 370*e0c1b49fSNick Terrell if (bestLength >= MINMATCH) { 371*e0c1b49fSNick Terrell U32 const mIndex = curr - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex; 372*e0c1b49fSNick Terrell DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)", 373*e0c1b49fSNick Terrell curr, (U32)bestLength, (U32)*offsetPtr, mIndex); 374*e0c1b49fSNick Terrell } 375*e0c1b49fSNick Terrell return bestLength; 376*e0c1b49fSNick Terrell } 377*e0c1b49fSNick Terrell } 378*e0c1b49fSNick Terrell 379*e0c1b49fSNick Terrell 380*e0c1b49fSNick Terrell /* ZSTD_BtFindBestMatch() : Tree updater, providing best match */ 381*e0c1b49fSNick Terrell FORCE_INLINE_TEMPLATE size_t 382*e0c1b49fSNick Terrell ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms, 383*e0c1b49fSNick Terrell const BYTE* const ip, const BYTE* const iLimit, 384*e0c1b49fSNick Terrell size_t* offsetPtr, 385*e0c1b49fSNick Terrell const U32 mls /* template */, 386*e0c1b49fSNick Terrell const ZSTD_dictMode_e dictMode) 387*e0c1b49fSNick Terrell { 388*e0c1b49fSNick Terrell DEBUGLOG(7, "ZSTD_BtFindBestMatch"); 389*e0c1b49fSNick Terrell if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */ 390*e0c1b49fSNick Terrell ZSTD_updateDUBT(ms, ip, iLimit, mls); 391*e0c1b49fSNick Terrell return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offsetPtr, mls, dictMode); 392*e0c1b49fSNick Terrell } 393*e0c1b49fSNick Terrell 394*e0c1b49fSNick Terrell 395*e0c1b49fSNick Terrell static size_t 396*e0c1b49fSNick Terrell ZSTD_BtFindBestMatch_selectMLS ( ZSTD_matchState_t* ms, 397*e0c1b49fSNick Terrell const BYTE* ip, const BYTE* const iLimit, 398*e0c1b49fSNick Terrell size_t* offsetPtr) 399*e0c1b49fSNick Terrell { 400*e0c1b49fSNick Terrell switch(ms->cParams.minMatch) 401*e0c1b49fSNick Terrell { 402*e0c1b49fSNick Terrell default : /* includes case 3 */ 403*e0c1b49fSNick Terrell case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict); 404*e0c1b49fSNick Terrell case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict); 405*e0c1b49fSNick Terrell case 7 : 406*e0c1b49fSNick Terrell case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict); 407*e0c1b49fSNick Terrell } 408*e0c1b49fSNick Terrell } 409*e0c1b49fSNick Terrell 410*e0c1b49fSNick Terrell 411*e0c1b49fSNick Terrell static size_t ZSTD_BtFindBestMatch_dictMatchState_selectMLS ( 412*e0c1b49fSNick Terrell ZSTD_matchState_t* ms, 413*e0c1b49fSNick Terrell const BYTE* ip, const BYTE* const iLimit, 414*e0c1b49fSNick Terrell size_t* offsetPtr) 415*e0c1b49fSNick Terrell { 416*e0c1b49fSNick Terrell switch(ms->cParams.minMatch) 417*e0c1b49fSNick Terrell { 418*e0c1b49fSNick Terrell default : /* includes case 3 */ 419*e0c1b49fSNick Terrell case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState); 420*e0c1b49fSNick Terrell case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState); 421*e0c1b49fSNick Terrell case 7 : 422*e0c1b49fSNick Terrell case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState); 423*e0c1b49fSNick Terrell } 424*e0c1b49fSNick Terrell } 425*e0c1b49fSNick Terrell 426*e0c1b49fSNick Terrell 427*e0c1b49fSNick Terrell static size_t ZSTD_BtFindBestMatch_extDict_selectMLS ( 428*e0c1b49fSNick Terrell ZSTD_matchState_t* ms, 429*e0c1b49fSNick Terrell const BYTE* ip, const BYTE* const iLimit, 430*e0c1b49fSNick Terrell size_t* offsetPtr) 431*e0c1b49fSNick Terrell { 432*e0c1b49fSNick Terrell switch(ms->cParams.minMatch) 433*e0c1b49fSNick Terrell { 434*e0c1b49fSNick Terrell default : /* includes case 3 */ 435*e0c1b49fSNick Terrell case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict); 436*e0c1b49fSNick Terrell case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict); 437*e0c1b49fSNick Terrell case 7 : 438*e0c1b49fSNick Terrell case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict); 439*e0c1b49fSNick Terrell } 440*e0c1b49fSNick Terrell } 441*e0c1b49fSNick Terrell 442*e0c1b49fSNick Terrell 443*e0c1b49fSNick Terrell 444*e0c1b49fSNick Terrell /* ********************************* 445*e0c1b49fSNick Terrell * Hash Chain 446*e0c1b49fSNick Terrell ***********************************/ 447*e0c1b49fSNick Terrell #define NEXT_IN_CHAIN(d, mask) chainTable[(d) & (mask)] 448*e0c1b49fSNick Terrell 449*e0c1b49fSNick Terrell /* Update chains up to ip (excluded) 450*e0c1b49fSNick Terrell Assumption : always within prefix (i.e. not within extDict) */ 451*e0c1b49fSNick Terrell FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal( 452*e0c1b49fSNick Terrell ZSTD_matchState_t* ms, 453*e0c1b49fSNick Terrell const ZSTD_compressionParameters* const cParams, 454*e0c1b49fSNick Terrell const BYTE* ip, U32 const mls) 455*e0c1b49fSNick Terrell { 456*e0c1b49fSNick Terrell U32* const hashTable = ms->hashTable; 457*e0c1b49fSNick Terrell const U32 hashLog = cParams->hashLog; 458*e0c1b49fSNick Terrell U32* const chainTable = ms->chainTable; 459*e0c1b49fSNick Terrell const U32 chainMask = (1 << cParams->chainLog) - 1; 460*e0c1b49fSNick Terrell const BYTE* const base = ms->window.base; 461*e0c1b49fSNick Terrell const U32 target = (U32)(ip - base); 462*e0c1b49fSNick Terrell U32 idx = ms->nextToUpdate; 463*e0c1b49fSNick Terrell 464*e0c1b49fSNick Terrell while(idx < target) { /* catch up */ 465*e0c1b49fSNick Terrell size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls); 466*e0c1b49fSNick Terrell NEXT_IN_CHAIN(idx, chainMask) = hashTable[h]; 467*e0c1b49fSNick Terrell hashTable[h] = idx; 468*e0c1b49fSNick Terrell idx++; 469*e0c1b49fSNick Terrell } 470*e0c1b49fSNick Terrell 471*e0c1b49fSNick Terrell ms->nextToUpdate = target; 472*e0c1b49fSNick Terrell return hashTable[ZSTD_hashPtr(ip, hashLog, mls)]; 473*e0c1b49fSNick Terrell } 474*e0c1b49fSNick Terrell 475*e0c1b49fSNick Terrell U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) { 476*e0c1b49fSNick Terrell const ZSTD_compressionParameters* const cParams = &ms->cParams; 477*e0c1b49fSNick Terrell return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch); 478*e0c1b49fSNick Terrell } 479*e0c1b49fSNick Terrell 480*e0c1b49fSNick Terrell void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip) 481*e0c1b49fSNick Terrell { 482*e0c1b49fSNick Terrell const BYTE* const base = ms->window.base; 483*e0c1b49fSNick Terrell U32 const target = (U32)(ip - base); 484*e0c1b49fSNick Terrell U32* const hashTable = ms->hashTable; 485*e0c1b49fSNick Terrell U32* const chainTable = ms->chainTable; 486*e0c1b49fSNick Terrell U32 const chainSize = 1 << ms->cParams.chainLog; 487*e0c1b49fSNick Terrell U32 idx = ms->nextToUpdate; 488*e0c1b49fSNick Terrell U32 const minChain = chainSize < target ? target - chainSize : idx; 489*e0c1b49fSNick Terrell U32 const bucketSize = 1 << ZSTD_LAZY_DDSS_BUCKET_LOG; 490*e0c1b49fSNick Terrell U32 const cacheSize = bucketSize - 1; 491*e0c1b49fSNick Terrell U32 const chainAttempts = (1 << ms->cParams.searchLog) - cacheSize; 492*e0c1b49fSNick Terrell U32 const chainLimit = chainAttempts > 255 ? 255 : chainAttempts; 493*e0c1b49fSNick Terrell 494*e0c1b49fSNick Terrell /* We know the hashtable is oversized by a factor of `bucketSize`. 495*e0c1b49fSNick Terrell * We are going to temporarily pretend `bucketSize == 1`, keeping only a 496*e0c1b49fSNick Terrell * single entry. We will use the rest of the space to construct a temporary 497*e0c1b49fSNick Terrell * chaintable. 498*e0c1b49fSNick Terrell */ 499*e0c1b49fSNick Terrell U32 const hashLog = ms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG; 500*e0c1b49fSNick Terrell U32* const tmpHashTable = hashTable; 501*e0c1b49fSNick Terrell U32* const tmpChainTable = hashTable + ((size_t)1 << hashLog); 502*e0c1b49fSNick Terrell U32 const tmpChainSize = ((1 << ZSTD_LAZY_DDSS_BUCKET_LOG) - 1) << hashLog; 503*e0c1b49fSNick Terrell U32 const tmpMinChain = tmpChainSize < target ? target - tmpChainSize : idx; 504*e0c1b49fSNick Terrell 505*e0c1b49fSNick Terrell U32 hashIdx; 506*e0c1b49fSNick Terrell 507*e0c1b49fSNick Terrell assert(ms->cParams.chainLog <= 24); 508*e0c1b49fSNick Terrell assert(ms->cParams.hashLog >= ms->cParams.chainLog); 509*e0c1b49fSNick Terrell assert(idx != 0); 510*e0c1b49fSNick Terrell assert(tmpMinChain <= minChain); 511*e0c1b49fSNick Terrell 512*e0c1b49fSNick Terrell /* fill conventional hash table and conventional chain table */ 513*e0c1b49fSNick Terrell for ( ; idx < target; idx++) { 514*e0c1b49fSNick Terrell U32 const h = (U32)ZSTD_hashPtr(base + idx, hashLog, ms->cParams.minMatch); 515*e0c1b49fSNick Terrell if (idx >= tmpMinChain) { 516*e0c1b49fSNick Terrell tmpChainTable[idx - tmpMinChain] = hashTable[h]; 517*e0c1b49fSNick Terrell } 518*e0c1b49fSNick Terrell tmpHashTable[h] = idx; 519*e0c1b49fSNick Terrell } 520*e0c1b49fSNick Terrell 521*e0c1b49fSNick Terrell /* sort chains into ddss chain table */ 522*e0c1b49fSNick Terrell { 523*e0c1b49fSNick Terrell U32 chainPos = 0; 524*e0c1b49fSNick Terrell for (hashIdx = 0; hashIdx < (1U << hashLog); hashIdx++) { 525*e0c1b49fSNick Terrell U32 count; 526*e0c1b49fSNick Terrell U32 countBeyondMinChain = 0; 527*e0c1b49fSNick Terrell U32 i = tmpHashTable[hashIdx]; 528*e0c1b49fSNick Terrell for (count = 0; i >= tmpMinChain && count < cacheSize; count++) { 529*e0c1b49fSNick Terrell /* skip through the chain to the first position that won't be 530*e0c1b49fSNick Terrell * in the hash cache bucket */ 531*e0c1b49fSNick Terrell if (i < minChain) { 532*e0c1b49fSNick Terrell countBeyondMinChain++; 533*e0c1b49fSNick Terrell } 534*e0c1b49fSNick Terrell i = tmpChainTable[i - tmpMinChain]; 535*e0c1b49fSNick Terrell } 536*e0c1b49fSNick Terrell if (count == cacheSize) { 537*e0c1b49fSNick Terrell for (count = 0; count < chainLimit;) { 538*e0c1b49fSNick Terrell if (i < minChain) { 539*e0c1b49fSNick Terrell if (!i || countBeyondMinChain++ > cacheSize) { 540*e0c1b49fSNick Terrell /* only allow pulling `cacheSize` number of entries 541*e0c1b49fSNick Terrell * into the cache or chainTable beyond `minChain`, 542*e0c1b49fSNick Terrell * to replace the entries pulled out of the 543*e0c1b49fSNick Terrell * chainTable into the cache. This lets us reach 544*e0c1b49fSNick Terrell * back further without increasing the total number 545*e0c1b49fSNick Terrell * of entries in the chainTable, guaranteeing the 546*e0c1b49fSNick Terrell * DDSS chain table will fit into the space 547*e0c1b49fSNick Terrell * allocated for the regular one. */ 548*e0c1b49fSNick Terrell break; 549*e0c1b49fSNick Terrell } 550*e0c1b49fSNick Terrell } 551*e0c1b49fSNick Terrell chainTable[chainPos++] = i; 552*e0c1b49fSNick Terrell count++; 553*e0c1b49fSNick Terrell if (i < tmpMinChain) { 554*e0c1b49fSNick Terrell break; 555*e0c1b49fSNick Terrell } 556*e0c1b49fSNick Terrell i = tmpChainTable[i - tmpMinChain]; 557*e0c1b49fSNick Terrell } 558*e0c1b49fSNick Terrell } else { 559*e0c1b49fSNick Terrell count = 0; 560*e0c1b49fSNick Terrell } 561*e0c1b49fSNick Terrell if (count) { 562*e0c1b49fSNick Terrell tmpHashTable[hashIdx] = ((chainPos - count) << 8) + count; 563*e0c1b49fSNick Terrell } else { 564*e0c1b49fSNick Terrell tmpHashTable[hashIdx] = 0; 565*e0c1b49fSNick Terrell } 566*e0c1b49fSNick Terrell } 567*e0c1b49fSNick Terrell assert(chainPos <= chainSize); /* I believe this is guaranteed... */ 568*e0c1b49fSNick Terrell } 569*e0c1b49fSNick Terrell 570*e0c1b49fSNick Terrell /* move chain pointers into the last entry of each hash bucket */ 571*e0c1b49fSNick Terrell for (hashIdx = (1 << hashLog); hashIdx; ) { 572*e0c1b49fSNick Terrell U32 const bucketIdx = --hashIdx << ZSTD_LAZY_DDSS_BUCKET_LOG; 573*e0c1b49fSNick Terrell U32 const chainPackedPointer = tmpHashTable[hashIdx]; 574*e0c1b49fSNick Terrell U32 i; 575*e0c1b49fSNick Terrell for (i = 0; i < cacheSize; i++) { 576*e0c1b49fSNick Terrell hashTable[bucketIdx + i] = 0; 577*e0c1b49fSNick Terrell } 578*e0c1b49fSNick Terrell hashTable[bucketIdx + bucketSize - 1] = chainPackedPointer; 579*e0c1b49fSNick Terrell } 580*e0c1b49fSNick Terrell 581*e0c1b49fSNick Terrell /* fill the buckets of the hash table */ 582*e0c1b49fSNick Terrell for (idx = ms->nextToUpdate; idx < target; idx++) { 583*e0c1b49fSNick Terrell U32 const h = (U32)ZSTD_hashPtr(base + idx, hashLog, ms->cParams.minMatch) 584*e0c1b49fSNick Terrell << ZSTD_LAZY_DDSS_BUCKET_LOG; 585*e0c1b49fSNick Terrell U32 i; 586*e0c1b49fSNick Terrell /* Shift hash cache down 1. */ 587*e0c1b49fSNick Terrell for (i = cacheSize - 1; i; i--) 588*e0c1b49fSNick Terrell hashTable[h + i] = hashTable[h + i - 1]; 589*e0c1b49fSNick Terrell hashTable[h] = idx; 590*e0c1b49fSNick Terrell } 591*e0c1b49fSNick Terrell 592*e0c1b49fSNick Terrell ms->nextToUpdate = target; 593*e0c1b49fSNick Terrell } 594*e0c1b49fSNick Terrell 595*e0c1b49fSNick Terrell 596*e0c1b49fSNick Terrell /* inlining is important to hardwire a hot branch (template emulation) */ 597*e0c1b49fSNick Terrell FORCE_INLINE_TEMPLATE 598*e0c1b49fSNick Terrell size_t ZSTD_HcFindBestMatch_generic ( 599*e0c1b49fSNick Terrell ZSTD_matchState_t* ms, 600*e0c1b49fSNick Terrell const BYTE* const ip, const BYTE* const iLimit, 601*e0c1b49fSNick Terrell size_t* offsetPtr, 602*e0c1b49fSNick Terrell const U32 mls, const ZSTD_dictMode_e dictMode) 603*e0c1b49fSNick Terrell { 604*e0c1b49fSNick Terrell const ZSTD_compressionParameters* const cParams = &ms->cParams; 605*e0c1b49fSNick Terrell U32* const chainTable = ms->chainTable; 606*e0c1b49fSNick Terrell const U32 chainSize = (1 << cParams->chainLog); 607*e0c1b49fSNick Terrell const U32 chainMask = chainSize-1; 608*e0c1b49fSNick Terrell const BYTE* const base = ms->window.base; 609*e0c1b49fSNick Terrell const BYTE* const dictBase = ms->window.dictBase; 610*e0c1b49fSNick Terrell const U32 dictLimit = ms->window.dictLimit; 611*e0c1b49fSNick Terrell const BYTE* const prefixStart = base + dictLimit; 612*e0c1b49fSNick Terrell const BYTE* const dictEnd = dictBase + dictLimit; 613*e0c1b49fSNick Terrell const U32 curr = (U32)(ip-base); 614*e0c1b49fSNick Terrell const U32 maxDistance = 1U << cParams->windowLog; 615*e0c1b49fSNick Terrell const U32 lowestValid = ms->window.lowLimit; 616*e0c1b49fSNick Terrell const U32 withinMaxDistance = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid; 617*e0c1b49fSNick Terrell const U32 isDictionary = (ms->loadedDictEnd != 0); 618*e0c1b49fSNick Terrell const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance; 619*e0c1b49fSNick Terrell const U32 minChain = curr > chainSize ? curr - chainSize : 0; 620*e0c1b49fSNick Terrell U32 nbAttempts = 1U << cParams->searchLog; 621*e0c1b49fSNick Terrell size_t ml=4-1; 622*e0c1b49fSNick Terrell 623*e0c1b49fSNick Terrell const ZSTD_matchState_t* const dms = ms->dictMatchState; 624*e0c1b49fSNick Terrell const U32 ddsHashLog = dictMode == ZSTD_dedicatedDictSearch 625*e0c1b49fSNick Terrell ? dms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG : 0; 626*e0c1b49fSNick Terrell const size_t ddsIdx = dictMode == ZSTD_dedicatedDictSearch 627*e0c1b49fSNick Terrell ? ZSTD_hashPtr(ip, ddsHashLog, mls) << ZSTD_LAZY_DDSS_BUCKET_LOG : 0; 628*e0c1b49fSNick Terrell 629*e0c1b49fSNick Terrell U32 matchIndex; 630*e0c1b49fSNick Terrell 631*e0c1b49fSNick Terrell if (dictMode == ZSTD_dedicatedDictSearch) { 632*e0c1b49fSNick Terrell const U32* entry = &dms->hashTable[ddsIdx]; 633*e0c1b49fSNick Terrell PREFETCH_L1(entry); 634*e0c1b49fSNick Terrell } 635*e0c1b49fSNick Terrell 636*e0c1b49fSNick Terrell /* HC4 match finder */ 637*e0c1b49fSNick Terrell matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls); 638*e0c1b49fSNick Terrell 639*e0c1b49fSNick Terrell for ( ; (matchIndex>=lowLimit) & (nbAttempts>0) ; nbAttempts--) { 640*e0c1b49fSNick Terrell size_t currentMl=0; 641*e0c1b49fSNick Terrell if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) { 642*e0c1b49fSNick Terrell const BYTE* const match = base + matchIndex; 643*e0c1b49fSNick Terrell assert(matchIndex >= dictLimit); /* ensures this is true if dictMode != ZSTD_extDict */ 644*e0c1b49fSNick Terrell if (match[ml] == ip[ml]) /* potentially better */ 645*e0c1b49fSNick Terrell currentMl = ZSTD_count(ip, match, iLimit); 646*e0c1b49fSNick Terrell } else { 647*e0c1b49fSNick Terrell const BYTE* const match = dictBase + matchIndex; 648*e0c1b49fSNick Terrell assert(match+4 <= dictEnd); 649*e0c1b49fSNick Terrell if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */ 650*e0c1b49fSNick Terrell currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4; 651*e0c1b49fSNick Terrell } 652*e0c1b49fSNick Terrell 653*e0c1b49fSNick Terrell /* save best solution */ 654*e0c1b49fSNick Terrell if (currentMl > ml) { 655*e0c1b49fSNick Terrell ml = currentMl; 656*e0c1b49fSNick Terrell *offsetPtr = curr - matchIndex + ZSTD_REP_MOVE; 657*e0c1b49fSNick Terrell if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ 658*e0c1b49fSNick Terrell } 659*e0c1b49fSNick Terrell 660*e0c1b49fSNick Terrell if (matchIndex <= minChain) break; 661*e0c1b49fSNick Terrell matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask); 662*e0c1b49fSNick Terrell } 663*e0c1b49fSNick Terrell 664*e0c1b49fSNick Terrell assert(nbAttempts <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */ 665*e0c1b49fSNick Terrell if (dictMode == ZSTD_dedicatedDictSearch) { 666*e0c1b49fSNick Terrell const U32 ddsLowestIndex = dms->window.dictLimit; 667*e0c1b49fSNick Terrell const BYTE* const ddsBase = dms->window.base; 668*e0c1b49fSNick Terrell const BYTE* const ddsEnd = dms->window.nextSrc; 669*e0c1b49fSNick Terrell const U32 ddsSize = (U32)(ddsEnd - ddsBase); 670*e0c1b49fSNick Terrell const U32 ddsIndexDelta = dictLimit - ddsSize; 671*e0c1b49fSNick Terrell const U32 bucketSize = (1 << ZSTD_LAZY_DDSS_BUCKET_LOG); 672*e0c1b49fSNick Terrell const U32 bucketLimit = nbAttempts < bucketSize - 1 ? nbAttempts : bucketSize - 1; 673*e0c1b49fSNick Terrell U32 ddsAttempt; 674*e0c1b49fSNick Terrell 675*e0c1b49fSNick Terrell for (ddsAttempt = 0; ddsAttempt < bucketSize - 1; ddsAttempt++) { 676*e0c1b49fSNick Terrell PREFETCH_L1(ddsBase + dms->hashTable[ddsIdx + ddsAttempt]); 677*e0c1b49fSNick Terrell } 678*e0c1b49fSNick Terrell 679*e0c1b49fSNick Terrell { 680*e0c1b49fSNick Terrell U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1]; 681*e0c1b49fSNick Terrell U32 const chainIndex = chainPackedPointer >> 8; 682*e0c1b49fSNick Terrell 683*e0c1b49fSNick Terrell PREFETCH_L1(&dms->chainTable[chainIndex]); 684*e0c1b49fSNick Terrell } 685*e0c1b49fSNick Terrell 686*e0c1b49fSNick Terrell for (ddsAttempt = 0; ddsAttempt < bucketLimit; ddsAttempt++) { 687*e0c1b49fSNick Terrell size_t currentMl=0; 688*e0c1b49fSNick Terrell const BYTE* match; 689*e0c1b49fSNick Terrell matchIndex = dms->hashTable[ddsIdx + ddsAttempt]; 690*e0c1b49fSNick Terrell match = ddsBase + matchIndex; 691*e0c1b49fSNick Terrell 692*e0c1b49fSNick Terrell if (!matchIndex) { 693*e0c1b49fSNick Terrell return ml; 694*e0c1b49fSNick Terrell } 695*e0c1b49fSNick Terrell 696*e0c1b49fSNick Terrell /* guaranteed by table construction */ 697*e0c1b49fSNick Terrell (void)ddsLowestIndex; 698*e0c1b49fSNick Terrell assert(matchIndex >= ddsLowestIndex); 699*e0c1b49fSNick Terrell assert(match+4 <= ddsEnd); 700*e0c1b49fSNick Terrell if (MEM_read32(match) == MEM_read32(ip)) { 701*e0c1b49fSNick Terrell /* assumption : matchIndex <= dictLimit-4 (by table construction) */ 702*e0c1b49fSNick Terrell currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4; 703*e0c1b49fSNick Terrell } 704*e0c1b49fSNick Terrell 705*e0c1b49fSNick Terrell /* save best solution */ 706*e0c1b49fSNick Terrell if (currentMl > ml) { 707*e0c1b49fSNick Terrell ml = currentMl; 708*e0c1b49fSNick Terrell *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE; 709*e0c1b49fSNick Terrell if (ip+currentMl == iLimit) { 710*e0c1b49fSNick Terrell /* best possible, avoids read overflow on next attempt */ 711*e0c1b49fSNick Terrell return ml; 712*e0c1b49fSNick Terrell } 713*e0c1b49fSNick Terrell } 714*e0c1b49fSNick Terrell } 715*e0c1b49fSNick Terrell 716*e0c1b49fSNick Terrell { 717*e0c1b49fSNick Terrell U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1]; 718*e0c1b49fSNick Terrell U32 chainIndex = chainPackedPointer >> 8; 719*e0c1b49fSNick Terrell U32 const chainLength = chainPackedPointer & 0xFF; 720*e0c1b49fSNick Terrell U32 const chainAttempts = nbAttempts - ddsAttempt; 721*e0c1b49fSNick Terrell U32 const chainLimit = chainAttempts > chainLength ? chainLength : chainAttempts; 722*e0c1b49fSNick Terrell U32 chainAttempt; 723*e0c1b49fSNick Terrell 724*e0c1b49fSNick Terrell for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++) { 725*e0c1b49fSNick Terrell PREFETCH_L1(ddsBase + dms->chainTable[chainIndex + chainAttempt]); 726*e0c1b49fSNick Terrell } 727*e0c1b49fSNick Terrell 728*e0c1b49fSNick Terrell for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++, chainIndex++) { 729*e0c1b49fSNick Terrell size_t currentMl=0; 730*e0c1b49fSNick Terrell const BYTE* match; 731*e0c1b49fSNick Terrell matchIndex = dms->chainTable[chainIndex]; 732*e0c1b49fSNick Terrell match = ddsBase + matchIndex; 733*e0c1b49fSNick Terrell 734*e0c1b49fSNick Terrell /* guaranteed by table construction */ 735*e0c1b49fSNick Terrell assert(matchIndex >= ddsLowestIndex); 736*e0c1b49fSNick Terrell assert(match+4 <= ddsEnd); 737*e0c1b49fSNick Terrell if (MEM_read32(match) == MEM_read32(ip)) { 738*e0c1b49fSNick Terrell /* assumption : matchIndex <= dictLimit-4 (by table construction) */ 739*e0c1b49fSNick Terrell currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4; 740*e0c1b49fSNick Terrell } 741*e0c1b49fSNick Terrell 742*e0c1b49fSNick Terrell /* save best solution */ 743*e0c1b49fSNick Terrell if (currentMl > ml) { 744*e0c1b49fSNick Terrell ml = currentMl; 745*e0c1b49fSNick Terrell *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE; 746*e0c1b49fSNick Terrell if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ 747*e0c1b49fSNick Terrell } 748*e0c1b49fSNick Terrell } 749*e0c1b49fSNick Terrell } 750*e0c1b49fSNick Terrell } else if (dictMode == ZSTD_dictMatchState) { 751*e0c1b49fSNick Terrell const U32* const dmsChainTable = dms->chainTable; 752*e0c1b49fSNick Terrell const U32 dmsChainSize = (1 << dms->cParams.chainLog); 753*e0c1b49fSNick Terrell const U32 dmsChainMask = dmsChainSize - 1; 754*e0c1b49fSNick Terrell const U32 dmsLowestIndex = dms->window.dictLimit; 755*e0c1b49fSNick Terrell const BYTE* const dmsBase = dms->window.base; 756*e0c1b49fSNick Terrell const BYTE* const dmsEnd = dms->window.nextSrc; 757*e0c1b49fSNick Terrell const U32 dmsSize = (U32)(dmsEnd - dmsBase); 758*e0c1b49fSNick Terrell const U32 dmsIndexDelta = dictLimit - dmsSize; 759*e0c1b49fSNick Terrell const U32 dmsMinChain = dmsSize > dmsChainSize ? dmsSize - dmsChainSize : 0; 760*e0c1b49fSNick Terrell 761*e0c1b49fSNick Terrell matchIndex = dms->hashTable[ZSTD_hashPtr(ip, dms->cParams.hashLog, mls)]; 762*e0c1b49fSNick Terrell 763*e0c1b49fSNick Terrell for ( ; (matchIndex>=dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) { 764*e0c1b49fSNick Terrell size_t currentMl=0; 765*e0c1b49fSNick Terrell const BYTE* const match = dmsBase + matchIndex; 766*e0c1b49fSNick Terrell assert(match+4 <= dmsEnd); 767*e0c1b49fSNick Terrell if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */ 768*e0c1b49fSNick Terrell currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4; 769*e0c1b49fSNick Terrell 770*e0c1b49fSNick Terrell /* save best solution */ 771*e0c1b49fSNick Terrell if (currentMl > ml) { 772*e0c1b49fSNick Terrell ml = currentMl; 773*e0c1b49fSNick Terrell *offsetPtr = curr - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE; 774*e0c1b49fSNick Terrell if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ 775*e0c1b49fSNick Terrell } 776*e0c1b49fSNick Terrell 777*e0c1b49fSNick Terrell if (matchIndex <= dmsMinChain) break; 778*e0c1b49fSNick Terrell 779*e0c1b49fSNick Terrell matchIndex = dmsChainTable[matchIndex & dmsChainMask]; 780*e0c1b49fSNick Terrell } 781*e0c1b49fSNick Terrell } 782*e0c1b49fSNick Terrell 783*e0c1b49fSNick Terrell return ml; 784*e0c1b49fSNick Terrell } 785*e0c1b49fSNick Terrell 786*e0c1b49fSNick Terrell 787*e0c1b49fSNick Terrell FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS ( 788*e0c1b49fSNick Terrell ZSTD_matchState_t* ms, 789*e0c1b49fSNick Terrell const BYTE* ip, const BYTE* const iLimit, 790*e0c1b49fSNick Terrell size_t* offsetPtr) 791*e0c1b49fSNick Terrell { 792*e0c1b49fSNick Terrell switch(ms->cParams.minMatch) 793*e0c1b49fSNick Terrell { 794*e0c1b49fSNick Terrell default : /* includes case 3 */ 795*e0c1b49fSNick Terrell case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict); 796*e0c1b49fSNick Terrell case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict); 797*e0c1b49fSNick Terrell case 7 : 798*e0c1b49fSNick Terrell case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict); 799*e0c1b49fSNick Terrell } 800*e0c1b49fSNick Terrell } 801*e0c1b49fSNick Terrell 802*e0c1b49fSNick Terrell 803*e0c1b49fSNick Terrell static size_t ZSTD_HcFindBestMatch_dictMatchState_selectMLS ( 804*e0c1b49fSNick Terrell ZSTD_matchState_t* ms, 805*e0c1b49fSNick Terrell const BYTE* ip, const BYTE* const iLimit, 806*e0c1b49fSNick Terrell size_t* offsetPtr) 807*e0c1b49fSNick Terrell { 808*e0c1b49fSNick Terrell switch(ms->cParams.minMatch) 809*e0c1b49fSNick Terrell { 810*e0c1b49fSNick Terrell default : /* includes case 3 */ 811*e0c1b49fSNick Terrell case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState); 812*e0c1b49fSNick Terrell case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState); 813*e0c1b49fSNick Terrell case 7 : 814*e0c1b49fSNick Terrell case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState); 815*e0c1b49fSNick Terrell } 816*e0c1b49fSNick Terrell } 817*e0c1b49fSNick Terrell 818*e0c1b49fSNick Terrell 819*e0c1b49fSNick Terrell static size_t ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS ( 820*e0c1b49fSNick Terrell ZSTD_matchState_t* ms, 821*e0c1b49fSNick Terrell const BYTE* ip, const BYTE* const iLimit, 822*e0c1b49fSNick Terrell size_t* offsetPtr) 823*e0c1b49fSNick Terrell { 824*e0c1b49fSNick Terrell switch(ms->cParams.minMatch) 825*e0c1b49fSNick Terrell { 826*e0c1b49fSNick Terrell default : /* includes case 3 */ 827*e0c1b49fSNick Terrell case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dedicatedDictSearch); 828*e0c1b49fSNick Terrell case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dedicatedDictSearch); 829*e0c1b49fSNick Terrell case 7 : 830*e0c1b49fSNick Terrell case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dedicatedDictSearch); 831*e0c1b49fSNick Terrell } 832*e0c1b49fSNick Terrell } 833*e0c1b49fSNick Terrell 834*e0c1b49fSNick Terrell 835*e0c1b49fSNick Terrell FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS ( 836*e0c1b49fSNick Terrell ZSTD_matchState_t* ms, 837*e0c1b49fSNick Terrell const BYTE* ip, const BYTE* const iLimit, 838*e0c1b49fSNick Terrell size_t* offsetPtr) 839*e0c1b49fSNick Terrell { 840*e0c1b49fSNick Terrell switch(ms->cParams.minMatch) 841*e0c1b49fSNick Terrell { 842*e0c1b49fSNick Terrell default : /* includes case 3 */ 843*e0c1b49fSNick Terrell case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict); 844*e0c1b49fSNick Terrell case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict); 845*e0c1b49fSNick Terrell case 7 : 846*e0c1b49fSNick Terrell case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict); 847*e0c1b49fSNick Terrell } 848*e0c1b49fSNick Terrell } 849*e0c1b49fSNick Terrell 850*e0c1b49fSNick Terrell 851*e0c1b49fSNick Terrell /* ******************************* 852*e0c1b49fSNick Terrell * Common parser - lazy strategy 853*e0c1b49fSNick Terrell *********************************/ 854*e0c1b49fSNick Terrell typedef enum { search_hashChain, search_binaryTree } searchMethod_e; 855*e0c1b49fSNick Terrell 856*e0c1b49fSNick Terrell FORCE_INLINE_TEMPLATE size_t 857*e0c1b49fSNick Terrell ZSTD_compressBlock_lazy_generic( 858*e0c1b49fSNick Terrell ZSTD_matchState_t* ms, seqStore_t* seqStore, 859*e0c1b49fSNick Terrell U32 rep[ZSTD_REP_NUM], 860*e0c1b49fSNick Terrell const void* src, size_t srcSize, 861*e0c1b49fSNick Terrell const searchMethod_e searchMethod, const U32 depth, 862*e0c1b49fSNick Terrell ZSTD_dictMode_e const dictMode) 863*e0c1b49fSNick Terrell { 864*e0c1b49fSNick Terrell const BYTE* const istart = (const BYTE*)src; 865*e0c1b49fSNick Terrell const BYTE* ip = istart; 866*e0c1b49fSNick Terrell const BYTE* anchor = istart; 867*e0c1b49fSNick Terrell const BYTE* const iend = istart + srcSize; 868*e0c1b49fSNick Terrell const BYTE* const ilimit = iend - 8; 869*e0c1b49fSNick Terrell const BYTE* const base = ms->window.base; 870*e0c1b49fSNick Terrell const U32 prefixLowestIndex = ms->window.dictLimit; 871*e0c1b49fSNick Terrell const BYTE* const prefixLowest = base + prefixLowestIndex; 872*e0c1b49fSNick Terrell 873*e0c1b49fSNick Terrell typedef size_t (*searchMax_f)( 874*e0c1b49fSNick Terrell ZSTD_matchState_t* ms, 875*e0c1b49fSNick Terrell const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr); 876*e0c1b49fSNick Terrell 877*e0c1b49fSNick Terrell /* 878*e0c1b49fSNick Terrell * This table is indexed first by the four ZSTD_dictMode_e values, and then 879*e0c1b49fSNick Terrell * by the two searchMethod_e values. NULLs are placed for configurations 880*e0c1b49fSNick Terrell * that should never occur (extDict modes go to the other implementation 881*e0c1b49fSNick Terrell * below and there is no DDSS for binary tree search yet). 882*e0c1b49fSNick Terrell */ 883*e0c1b49fSNick Terrell const searchMax_f searchFuncs[4][2] = { 884*e0c1b49fSNick Terrell { 885*e0c1b49fSNick Terrell ZSTD_HcFindBestMatch_selectMLS, 886*e0c1b49fSNick Terrell ZSTD_BtFindBestMatch_selectMLS 887*e0c1b49fSNick Terrell }, 888*e0c1b49fSNick Terrell { 889*e0c1b49fSNick Terrell NULL, 890*e0c1b49fSNick Terrell NULL 891*e0c1b49fSNick Terrell }, 892*e0c1b49fSNick Terrell { 893*e0c1b49fSNick Terrell ZSTD_HcFindBestMatch_dictMatchState_selectMLS, 894*e0c1b49fSNick Terrell ZSTD_BtFindBestMatch_dictMatchState_selectMLS 895*e0c1b49fSNick Terrell }, 896*e0c1b49fSNick Terrell { 897*e0c1b49fSNick Terrell ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS, 898*e0c1b49fSNick Terrell NULL 899*e0c1b49fSNick Terrell } 900*e0c1b49fSNick Terrell }; 901*e0c1b49fSNick Terrell 902*e0c1b49fSNick Terrell searchMax_f const searchMax = searchFuncs[dictMode][searchMethod == search_binaryTree]; 903*e0c1b49fSNick Terrell U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0; 904*e0c1b49fSNick Terrell 905*e0c1b49fSNick Terrell const int isDMS = dictMode == ZSTD_dictMatchState; 906*e0c1b49fSNick Terrell const int isDDS = dictMode == ZSTD_dedicatedDictSearch; 907*e0c1b49fSNick Terrell const int isDxS = isDMS || isDDS; 908*e0c1b49fSNick Terrell const ZSTD_matchState_t* const dms = ms->dictMatchState; 909*e0c1b49fSNick Terrell const U32 dictLowestIndex = isDxS ? dms->window.dictLimit : 0; 910*e0c1b49fSNick Terrell const BYTE* const dictBase = isDxS ? dms->window.base : NULL; 911*e0c1b49fSNick Terrell const BYTE* const dictLowest = isDxS ? dictBase + dictLowestIndex : NULL; 912*e0c1b49fSNick Terrell const BYTE* const dictEnd = isDxS ? dms->window.nextSrc : NULL; 913*e0c1b49fSNick Terrell const U32 dictIndexDelta = isDxS ? 914*e0c1b49fSNick Terrell prefixLowestIndex - (U32)(dictEnd - dictBase) : 915*e0c1b49fSNick Terrell 0; 916*e0c1b49fSNick Terrell const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest)); 917*e0c1b49fSNick Terrell 918*e0c1b49fSNick Terrell assert(searchMax != NULL); 919*e0c1b49fSNick Terrell 920*e0c1b49fSNick Terrell DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32)dictMode); 921*e0c1b49fSNick Terrell 922*e0c1b49fSNick Terrell /* init */ 923*e0c1b49fSNick Terrell ip += (dictAndPrefixLength == 0); 924*e0c1b49fSNick Terrell if (dictMode == ZSTD_noDict) { 925*e0c1b49fSNick Terrell U32 const curr = (U32)(ip - base); 926*e0c1b49fSNick Terrell U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, ms->cParams.windowLog); 927*e0c1b49fSNick Terrell U32 const maxRep = curr - windowLow; 928*e0c1b49fSNick Terrell if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0; 929*e0c1b49fSNick Terrell if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0; 930*e0c1b49fSNick Terrell } 931*e0c1b49fSNick Terrell if (isDxS) { 932*e0c1b49fSNick Terrell /* dictMatchState repCode checks don't currently handle repCode == 0 933*e0c1b49fSNick Terrell * disabling. */ 934*e0c1b49fSNick Terrell assert(offset_1 <= dictAndPrefixLength); 935*e0c1b49fSNick Terrell assert(offset_2 <= dictAndPrefixLength); 936*e0c1b49fSNick Terrell } 937*e0c1b49fSNick Terrell 938*e0c1b49fSNick Terrell /* Match Loop */ 939*e0c1b49fSNick Terrell #if defined(__x86_64__) 940*e0c1b49fSNick Terrell /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the 941*e0c1b49fSNick Terrell * code alignment is perturbed. To fix the instability align the loop on 32-bytes. 942*e0c1b49fSNick Terrell */ 943*e0c1b49fSNick Terrell __asm__(".p2align 5"); 944*e0c1b49fSNick Terrell #endif 945*e0c1b49fSNick Terrell while (ip < ilimit) { 946*e0c1b49fSNick Terrell size_t matchLength=0; 947*e0c1b49fSNick Terrell size_t offset=0; 948*e0c1b49fSNick Terrell const BYTE* start=ip+1; 949*e0c1b49fSNick Terrell 950*e0c1b49fSNick Terrell /* check repCode */ 951*e0c1b49fSNick Terrell if (isDxS) { 952*e0c1b49fSNick Terrell const U32 repIndex = (U32)(ip - base) + 1 - offset_1; 953*e0c1b49fSNick Terrell const BYTE* repMatch = ((dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch) 954*e0c1b49fSNick Terrell && repIndex < prefixLowestIndex) ? 955*e0c1b49fSNick Terrell dictBase + (repIndex - dictIndexDelta) : 956*e0c1b49fSNick Terrell base + repIndex; 957*e0c1b49fSNick Terrell if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) 958*e0c1b49fSNick Terrell && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { 959*e0c1b49fSNick Terrell const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; 960*e0c1b49fSNick Terrell matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; 961*e0c1b49fSNick Terrell if (depth==0) goto _storeSequence; 962*e0c1b49fSNick Terrell } 963*e0c1b49fSNick Terrell } 964*e0c1b49fSNick Terrell if ( dictMode == ZSTD_noDict 965*e0c1b49fSNick Terrell && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) { 966*e0c1b49fSNick Terrell matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; 967*e0c1b49fSNick Terrell if (depth==0) goto _storeSequence; 968*e0c1b49fSNick Terrell } 969*e0c1b49fSNick Terrell 970*e0c1b49fSNick Terrell /* first search (depth 0) */ 971*e0c1b49fSNick Terrell { size_t offsetFound = 999999999; 972*e0c1b49fSNick Terrell size_t const ml2 = searchMax(ms, ip, iend, &offsetFound); 973*e0c1b49fSNick Terrell if (ml2 > matchLength) 974*e0c1b49fSNick Terrell matchLength = ml2, start = ip, offset=offsetFound; 975*e0c1b49fSNick Terrell } 976*e0c1b49fSNick Terrell 977*e0c1b49fSNick Terrell if (matchLength < 4) { 978*e0c1b49fSNick Terrell ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */ 979*e0c1b49fSNick Terrell continue; 980*e0c1b49fSNick Terrell } 981*e0c1b49fSNick Terrell 982*e0c1b49fSNick Terrell /* let's try to find a better solution */ 983*e0c1b49fSNick Terrell if (depth>=1) 984*e0c1b49fSNick Terrell while (ip<ilimit) { 985*e0c1b49fSNick Terrell ip ++; 986*e0c1b49fSNick Terrell if ( (dictMode == ZSTD_noDict) 987*e0c1b49fSNick Terrell && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { 988*e0c1b49fSNick Terrell size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; 989*e0c1b49fSNick Terrell int const gain2 = (int)(mlRep * 3); 990*e0c1b49fSNick Terrell int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); 991*e0c1b49fSNick Terrell if ((mlRep >= 4) && (gain2 > gain1)) 992*e0c1b49fSNick Terrell matchLength = mlRep, offset = 0, start = ip; 993*e0c1b49fSNick Terrell } 994*e0c1b49fSNick Terrell if (isDxS) { 995*e0c1b49fSNick Terrell const U32 repIndex = (U32)(ip - base) - offset_1; 996*e0c1b49fSNick Terrell const BYTE* repMatch = repIndex < prefixLowestIndex ? 997*e0c1b49fSNick Terrell dictBase + (repIndex - dictIndexDelta) : 998*e0c1b49fSNick Terrell base + repIndex; 999*e0c1b49fSNick Terrell if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) 1000*e0c1b49fSNick Terrell && (MEM_read32(repMatch) == MEM_read32(ip)) ) { 1001*e0c1b49fSNick Terrell const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; 1002*e0c1b49fSNick Terrell size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; 1003*e0c1b49fSNick Terrell int const gain2 = (int)(mlRep * 3); 1004*e0c1b49fSNick Terrell int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); 1005*e0c1b49fSNick Terrell if ((mlRep >= 4) && (gain2 > gain1)) 1006*e0c1b49fSNick Terrell matchLength = mlRep, offset = 0, start = ip; 1007*e0c1b49fSNick Terrell } 1008*e0c1b49fSNick Terrell } 1009*e0c1b49fSNick Terrell { size_t offset2=999999999; 1010*e0c1b49fSNick Terrell size_t const ml2 = searchMax(ms, ip, iend, &offset2); 1011*e0c1b49fSNick Terrell int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ 1012*e0c1b49fSNick Terrell int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4); 1013*e0c1b49fSNick Terrell if ((ml2 >= 4) && (gain2 > gain1)) { 1014*e0c1b49fSNick Terrell matchLength = ml2, offset = offset2, start = ip; 1015*e0c1b49fSNick Terrell continue; /* search a better one */ 1016*e0c1b49fSNick Terrell } } 1017*e0c1b49fSNick Terrell 1018*e0c1b49fSNick Terrell /* let's find an even better one */ 1019*e0c1b49fSNick Terrell if ((depth==2) && (ip<ilimit)) { 1020*e0c1b49fSNick Terrell ip ++; 1021*e0c1b49fSNick Terrell if ( (dictMode == ZSTD_noDict) 1022*e0c1b49fSNick Terrell && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { 1023*e0c1b49fSNick Terrell size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; 1024*e0c1b49fSNick Terrell int const gain2 = (int)(mlRep * 4); 1025*e0c1b49fSNick Terrell int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); 1026*e0c1b49fSNick Terrell if ((mlRep >= 4) && (gain2 > gain1)) 1027*e0c1b49fSNick Terrell matchLength = mlRep, offset = 0, start = ip; 1028*e0c1b49fSNick Terrell } 1029*e0c1b49fSNick Terrell if (isDxS) { 1030*e0c1b49fSNick Terrell const U32 repIndex = (U32)(ip - base) - offset_1; 1031*e0c1b49fSNick Terrell const BYTE* repMatch = repIndex < prefixLowestIndex ? 1032*e0c1b49fSNick Terrell dictBase + (repIndex - dictIndexDelta) : 1033*e0c1b49fSNick Terrell base + repIndex; 1034*e0c1b49fSNick Terrell if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) 1035*e0c1b49fSNick Terrell && (MEM_read32(repMatch) == MEM_read32(ip)) ) { 1036*e0c1b49fSNick Terrell const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; 1037*e0c1b49fSNick Terrell size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; 1038*e0c1b49fSNick Terrell int const gain2 = (int)(mlRep * 4); 1039*e0c1b49fSNick Terrell int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); 1040*e0c1b49fSNick Terrell if ((mlRep >= 4) && (gain2 > gain1)) 1041*e0c1b49fSNick Terrell matchLength = mlRep, offset = 0, start = ip; 1042*e0c1b49fSNick Terrell } 1043*e0c1b49fSNick Terrell } 1044*e0c1b49fSNick Terrell { size_t offset2=999999999; 1045*e0c1b49fSNick Terrell size_t const ml2 = searchMax(ms, ip, iend, &offset2); 1046*e0c1b49fSNick Terrell int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ 1047*e0c1b49fSNick Terrell int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7); 1048*e0c1b49fSNick Terrell if ((ml2 >= 4) && (gain2 > gain1)) { 1049*e0c1b49fSNick Terrell matchLength = ml2, offset = offset2, start = ip; 1050*e0c1b49fSNick Terrell continue; 1051*e0c1b49fSNick Terrell } } } 1052*e0c1b49fSNick Terrell break; /* nothing found : store previous solution */ 1053*e0c1b49fSNick Terrell } 1054*e0c1b49fSNick Terrell 1055*e0c1b49fSNick Terrell /* NOTE: 1056*e0c1b49fSNick Terrell * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior. 1057*e0c1b49fSNick Terrell * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which 1058*e0c1b49fSNick Terrell * overflows the pointer, which is undefined behavior. 1059*e0c1b49fSNick Terrell */ 1060*e0c1b49fSNick Terrell /* catch up */ 1061*e0c1b49fSNick Terrell if (offset) { 1062*e0c1b49fSNick Terrell if (dictMode == ZSTD_noDict) { 1063*e0c1b49fSNick Terrell while ( ((start > anchor) & (start - (offset-ZSTD_REP_MOVE) > prefixLowest)) 1064*e0c1b49fSNick Terrell && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) ) /* only search for offset within prefix */ 1065*e0c1b49fSNick Terrell { start--; matchLength++; } 1066*e0c1b49fSNick Terrell } 1067*e0c1b49fSNick Terrell if (isDxS) { 1068*e0c1b49fSNick Terrell U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE)); 1069*e0c1b49fSNick Terrell const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex; 1070*e0c1b49fSNick Terrell const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest; 1071*e0c1b49fSNick Terrell while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */ 1072*e0c1b49fSNick Terrell } 1073*e0c1b49fSNick Terrell offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE); 1074*e0c1b49fSNick Terrell } 1075*e0c1b49fSNick Terrell /* store sequence */ 1076*e0c1b49fSNick Terrell _storeSequence: 1077*e0c1b49fSNick Terrell { size_t const litLength = start - anchor; 1078*e0c1b49fSNick Terrell ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH); 1079*e0c1b49fSNick Terrell anchor = ip = start + matchLength; 1080*e0c1b49fSNick Terrell } 1081*e0c1b49fSNick Terrell 1082*e0c1b49fSNick Terrell /* check immediate repcode */ 1083*e0c1b49fSNick Terrell if (isDxS) { 1084*e0c1b49fSNick Terrell while (ip <= ilimit) { 1085*e0c1b49fSNick Terrell U32 const current2 = (U32)(ip-base); 1086*e0c1b49fSNick Terrell U32 const repIndex = current2 - offset_2; 1087*e0c1b49fSNick Terrell const BYTE* repMatch = repIndex < prefixLowestIndex ? 1088*e0c1b49fSNick Terrell dictBase - dictIndexDelta + repIndex : 1089*e0c1b49fSNick Terrell base + repIndex; 1090*e0c1b49fSNick Terrell if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex) >= 3 /* intentional overflow */) 1091*e0c1b49fSNick Terrell && (MEM_read32(repMatch) == MEM_read32(ip)) ) { 1092*e0c1b49fSNick Terrell const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend; 1093*e0c1b49fSNick Terrell matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4; 1094*e0c1b49fSNick Terrell offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset_2 <=> offset_1 */ 1095*e0c1b49fSNick Terrell ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH); 1096*e0c1b49fSNick Terrell ip += matchLength; 1097*e0c1b49fSNick Terrell anchor = ip; 1098*e0c1b49fSNick Terrell continue; 1099*e0c1b49fSNick Terrell } 1100*e0c1b49fSNick Terrell break; 1101*e0c1b49fSNick Terrell } 1102*e0c1b49fSNick Terrell } 1103*e0c1b49fSNick Terrell 1104*e0c1b49fSNick Terrell if (dictMode == ZSTD_noDict) { 1105*e0c1b49fSNick Terrell while ( ((ip <= ilimit) & (offset_2>0)) 1106*e0c1b49fSNick Terrell && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) { 1107*e0c1b49fSNick Terrell /* store sequence */ 1108*e0c1b49fSNick Terrell matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; 1109*e0c1b49fSNick Terrell offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */ 1110*e0c1b49fSNick Terrell ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH); 1111*e0c1b49fSNick Terrell ip += matchLength; 1112*e0c1b49fSNick Terrell anchor = ip; 1113*e0c1b49fSNick Terrell continue; /* faster when present ... (?) */ 1114*e0c1b49fSNick Terrell } } } 1115*e0c1b49fSNick Terrell 1116*e0c1b49fSNick Terrell /* Save reps for next block */ 1117*e0c1b49fSNick Terrell rep[0] = offset_1 ? offset_1 : savedOffset; 1118*e0c1b49fSNick Terrell rep[1] = offset_2 ? offset_2 : savedOffset; 1119*e0c1b49fSNick Terrell 1120*e0c1b49fSNick Terrell /* Return the last literals size */ 1121*e0c1b49fSNick Terrell return (size_t)(iend - anchor); 1122*e0c1b49fSNick Terrell } 1123*e0c1b49fSNick Terrell 1124*e0c1b49fSNick Terrell 1125*e0c1b49fSNick Terrell size_t ZSTD_compressBlock_btlazy2( 1126*e0c1b49fSNick Terrell ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1127*e0c1b49fSNick Terrell void const* src, size_t srcSize) 1128*e0c1b49fSNick Terrell { 1129*e0c1b49fSNick Terrell return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict); 1130*e0c1b49fSNick Terrell } 1131*e0c1b49fSNick Terrell 1132*e0c1b49fSNick Terrell size_t ZSTD_compressBlock_lazy2( 1133*e0c1b49fSNick Terrell ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1134*e0c1b49fSNick Terrell void const* src, size_t srcSize) 1135*e0c1b49fSNick Terrell { 1136*e0c1b49fSNick Terrell return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict); 1137*e0c1b49fSNick Terrell } 1138*e0c1b49fSNick Terrell 1139*e0c1b49fSNick Terrell size_t ZSTD_compressBlock_lazy( 1140*e0c1b49fSNick Terrell ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1141*e0c1b49fSNick Terrell void const* src, size_t srcSize) 1142*e0c1b49fSNick Terrell { 1143*e0c1b49fSNick Terrell return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict); 1144*e0c1b49fSNick Terrell } 1145*e0c1b49fSNick Terrell 1146*e0c1b49fSNick Terrell size_t ZSTD_compressBlock_greedy( 1147*e0c1b49fSNick Terrell ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1148*e0c1b49fSNick Terrell void const* src, size_t srcSize) 1149*e0c1b49fSNick Terrell { 1150*e0c1b49fSNick Terrell return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict); 1151*e0c1b49fSNick Terrell } 1152*e0c1b49fSNick Terrell 1153*e0c1b49fSNick Terrell size_t ZSTD_compressBlock_btlazy2_dictMatchState( 1154*e0c1b49fSNick Terrell ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1155*e0c1b49fSNick Terrell void const* src, size_t srcSize) 1156*e0c1b49fSNick Terrell { 1157*e0c1b49fSNick Terrell return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState); 1158*e0c1b49fSNick Terrell } 1159*e0c1b49fSNick Terrell 1160*e0c1b49fSNick Terrell size_t ZSTD_compressBlock_lazy2_dictMatchState( 1161*e0c1b49fSNick Terrell ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1162*e0c1b49fSNick Terrell void const* src, size_t srcSize) 1163*e0c1b49fSNick Terrell { 1164*e0c1b49fSNick Terrell return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState); 1165*e0c1b49fSNick Terrell } 1166*e0c1b49fSNick Terrell 1167*e0c1b49fSNick Terrell size_t ZSTD_compressBlock_lazy_dictMatchState( 1168*e0c1b49fSNick Terrell ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1169*e0c1b49fSNick Terrell void const* src, size_t srcSize) 1170*e0c1b49fSNick Terrell { 1171*e0c1b49fSNick Terrell return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState); 1172*e0c1b49fSNick Terrell } 1173*e0c1b49fSNick Terrell 1174*e0c1b49fSNick Terrell size_t ZSTD_compressBlock_greedy_dictMatchState( 1175*e0c1b49fSNick Terrell ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1176*e0c1b49fSNick Terrell void const* src, size_t srcSize) 1177*e0c1b49fSNick Terrell { 1178*e0c1b49fSNick Terrell return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState); 1179*e0c1b49fSNick Terrell } 1180*e0c1b49fSNick Terrell 1181*e0c1b49fSNick Terrell 1182*e0c1b49fSNick Terrell size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch( 1183*e0c1b49fSNick Terrell ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1184*e0c1b49fSNick Terrell void const* src, size_t srcSize) 1185*e0c1b49fSNick Terrell { 1186*e0c1b49fSNick Terrell return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dedicatedDictSearch); 1187*e0c1b49fSNick Terrell } 1188*e0c1b49fSNick Terrell 1189*e0c1b49fSNick Terrell size_t ZSTD_compressBlock_lazy_dedicatedDictSearch( 1190*e0c1b49fSNick Terrell ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1191*e0c1b49fSNick Terrell void const* src, size_t srcSize) 1192*e0c1b49fSNick Terrell { 1193*e0c1b49fSNick Terrell return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dedicatedDictSearch); 1194*e0c1b49fSNick Terrell } 1195*e0c1b49fSNick Terrell 1196*e0c1b49fSNick Terrell size_t ZSTD_compressBlock_greedy_dedicatedDictSearch( 1197*e0c1b49fSNick Terrell ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1198*e0c1b49fSNick Terrell void const* src, size_t srcSize) 1199*e0c1b49fSNick Terrell { 1200*e0c1b49fSNick Terrell return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch); 1201*e0c1b49fSNick Terrell } 1202*e0c1b49fSNick Terrell 1203*e0c1b49fSNick Terrell 1204*e0c1b49fSNick Terrell FORCE_INLINE_TEMPLATE 1205*e0c1b49fSNick Terrell size_t ZSTD_compressBlock_lazy_extDict_generic( 1206*e0c1b49fSNick Terrell ZSTD_matchState_t* ms, seqStore_t* seqStore, 1207*e0c1b49fSNick Terrell U32 rep[ZSTD_REP_NUM], 1208*e0c1b49fSNick Terrell const void* src, size_t srcSize, 1209*e0c1b49fSNick Terrell const searchMethod_e searchMethod, const U32 depth) 1210*e0c1b49fSNick Terrell { 1211*e0c1b49fSNick Terrell const BYTE* const istart = (const BYTE*)src; 1212*e0c1b49fSNick Terrell const BYTE* ip = istart; 1213*e0c1b49fSNick Terrell const BYTE* anchor = istart; 1214*e0c1b49fSNick Terrell const BYTE* const iend = istart + srcSize; 1215*e0c1b49fSNick Terrell const BYTE* const ilimit = iend - 8; 1216*e0c1b49fSNick Terrell const BYTE* const base = ms->window.base; 1217*e0c1b49fSNick Terrell const U32 dictLimit = ms->window.dictLimit; 1218*e0c1b49fSNick Terrell const BYTE* const prefixStart = base + dictLimit; 1219*e0c1b49fSNick Terrell const BYTE* const dictBase = ms->window.dictBase; 1220*e0c1b49fSNick Terrell const BYTE* const dictEnd = dictBase + dictLimit; 1221*e0c1b49fSNick Terrell const BYTE* const dictStart = dictBase + ms->window.lowLimit; 1222*e0c1b49fSNick Terrell const U32 windowLog = ms->cParams.windowLog; 1223*e0c1b49fSNick Terrell 1224*e0c1b49fSNick Terrell typedef size_t (*searchMax_f)( 1225*e0c1b49fSNick Terrell ZSTD_matchState_t* ms, 1226*e0c1b49fSNick Terrell const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr); 1227*e0c1b49fSNick Terrell searchMax_f searchMax = searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS; 1228*e0c1b49fSNick Terrell 1229*e0c1b49fSNick Terrell U32 offset_1 = rep[0], offset_2 = rep[1]; 1230*e0c1b49fSNick Terrell 1231*e0c1b49fSNick Terrell DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic"); 1232*e0c1b49fSNick Terrell 1233*e0c1b49fSNick Terrell /* init */ 1234*e0c1b49fSNick Terrell ip += (ip == prefixStart); 1235*e0c1b49fSNick Terrell 1236*e0c1b49fSNick Terrell /* Match Loop */ 1237*e0c1b49fSNick Terrell #if defined(__x86_64__) 1238*e0c1b49fSNick Terrell /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the 1239*e0c1b49fSNick Terrell * code alignment is perturbed. To fix the instability align the loop on 32-bytes. 1240*e0c1b49fSNick Terrell */ 1241*e0c1b49fSNick Terrell __asm__(".p2align 5"); 1242*e0c1b49fSNick Terrell #endif 1243*e0c1b49fSNick Terrell while (ip < ilimit) { 1244*e0c1b49fSNick Terrell size_t matchLength=0; 1245*e0c1b49fSNick Terrell size_t offset=0; 1246*e0c1b49fSNick Terrell const BYTE* start=ip+1; 1247*e0c1b49fSNick Terrell U32 curr = (U32)(ip-base); 1248*e0c1b49fSNick Terrell 1249*e0c1b49fSNick Terrell /* check repCode */ 1250*e0c1b49fSNick Terrell { const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr+1, windowLog); 1251*e0c1b49fSNick Terrell const U32 repIndex = (U32)(curr+1 - offset_1); 1252*e0c1b49fSNick Terrell const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; 1253*e0c1b49fSNick Terrell const BYTE* const repMatch = repBase + repIndex; 1254*e0c1b49fSNick Terrell if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */ 1255*e0c1b49fSNick Terrell if (MEM_read32(ip+1) == MEM_read32(repMatch)) { 1256*e0c1b49fSNick Terrell /* repcode detected we should take it */ 1257*e0c1b49fSNick Terrell const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; 1258*e0c1b49fSNick Terrell matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repEnd, prefixStart) + 4; 1259*e0c1b49fSNick Terrell if (depth==0) goto _storeSequence; 1260*e0c1b49fSNick Terrell } } 1261*e0c1b49fSNick Terrell 1262*e0c1b49fSNick Terrell /* first search (depth 0) */ 1263*e0c1b49fSNick Terrell { size_t offsetFound = 999999999; 1264*e0c1b49fSNick Terrell size_t const ml2 = searchMax(ms, ip, iend, &offsetFound); 1265*e0c1b49fSNick Terrell if (ml2 > matchLength) 1266*e0c1b49fSNick Terrell matchLength = ml2, start = ip, offset=offsetFound; 1267*e0c1b49fSNick Terrell } 1268*e0c1b49fSNick Terrell 1269*e0c1b49fSNick Terrell if (matchLength < 4) { 1270*e0c1b49fSNick Terrell ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */ 1271*e0c1b49fSNick Terrell continue; 1272*e0c1b49fSNick Terrell } 1273*e0c1b49fSNick Terrell 1274*e0c1b49fSNick Terrell /* let's try to find a better solution */ 1275*e0c1b49fSNick Terrell if (depth>=1) 1276*e0c1b49fSNick Terrell while (ip<ilimit) { 1277*e0c1b49fSNick Terrell ip ++; 1278*e0c1b49fSNick Terrell curr++; 1279*e0c1b49fSNick Terrell /* check repCode */ 1280*e0c1b49fSNick Terrell if (offset) { 1281*e0c1b49fSNick Terrell const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog); 1282*e0c1b49fSNick Terrell const U32 repIndex = (U32)(curr - offset_1); 1283*e0c1b49fSNick Terrell const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; 1284*e0c1b49fSNick Terrell const BYTE* const repMatch = repBase + repIndex; 1285*e0c1b49fSNick Terrell if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */ 1286*e0c1b49fSNick Terrell if (MEM_read32(ip) == MEM_read32(repMatch)) { 1287*e0c1b49fSNick Terrell /* repcode detected */ 1288*e0c1b49fSNick Terrell const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; 1289*e0c1b49fSNick Terrell size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; 1290*e0c1b49fSNick Terrell int const gain2 = (int)(repLength * 3); 1291*e0c1b49fSNick Terrell int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); 1292*e0c1b49fSNick Terrell if ((repLength >= 4) && (gain2 > gain1)) 1293*e0c1b49fSNick Terrell matchLength = repLength, offset = 0, start = ip; 1294*e0c1b49fSNick Terrell } } 1295*e0c1b49fSNick Terrell 1296*e0c1b49fSNick Terrell /* search match, depth 1 */ 1297*e0c1b49fSNick Terrell { size_t offset2=999999999; 1298*e0c1b49fSNick Terrell size_t const ml2 = searchMax(ms, ip, iend, &offset2); 1299*e0c1b49fSNick Terrell int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ 1300*e0c1b49fSNick Terrell int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4); 1301*e0c1b49fSNick Terrell if ((ml2 >= 4) && (gain2 > gain1)) { 1302*e0c1b49fSNick Terrell matchLength = ml2, offset = offset2, start = ip; 1303*e0c1b49fSNick Terrell continue; /* search a better one */ 1304*e0c1b49fSNick Terrell } } 1305*e0c1b49fSNick Terrell 1306*e0c1b49fSNick Terrell /* let's find an even better one */ 1307*e0c1b49fSNick Terrell if ((depth==2) && (ip<ilimit)) { 1308*e0c1b49fSNick Terrell ip ++; 1309*e0c1b49fSNick Terrell curr++; 1310*e0c1b49fSNick Terrell /* check repCode */ 1311*e0c1b49fSNick Terrell if (offset) { 1312*e0c1b49fSNick Terrell const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog); 1313*e0c1b49fSNick Terrell const U32 repIndex = (U32)(curr - offset_1); 1314*e0c1b49fSNick Terrell const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; 1315*e0c1b49fSNick Terrell const BYTE* const repMatch = repBase + repIndex; 1316*e0c1b49fSNick Terrell if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */ 1317*e0c1b49fSNick Terrell if (MEM_read32(ip) == MEM_read32(repMatch)) { 1318*e0c1b49fSNick Terrell /* repcode detected */ 1319*e0c1b49fSNick Terrell const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; 1320*e0c1b49fSNick Terrell size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; 1321*e0c1b49fSNick Terrell int const gain2 = (int)(repLength * 4); 1322*e0c1b49fSNick Terrell int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); 1323*e0c1b49fSNick Terrell if ((repLength >= 4) && (gain2 > gain1)) 1324*e0c1b49fSNick Terrell matchLength = repLength, offset = 0, start = ip; 1325*e0c1b49fSNick Terrell } } 1326*e0c1b49fSNick Terrell 1327*e0c1b49fSNick Terrell /* search match, depth 2 */ 1328*e0c1b49fSNick Terrell { size_t offset2=999999999; 1329*e0c1b49fSNick Terrell size_t const ml2 = searchMax(ms, ip, iend, &offset2); 1330*e0c1b49fSNick Terrell int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ 1331*e0c1b49fSNick Terrell int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7); 1332*e0c1b49fSNick Terrell if ((ml2 >= 4) && (gain2 > gain1)) { 1333*e0c1b49fSNick Terrell matchLength = ml2, offset = offset2, start = ip; 1334*e0c1b49fSNick Terrell continue; 1335*e0c1b49fSNick Terrell } } } 1336*e0c1b49fSNick Terrell break; /* nothing found : store previous solution */ 1337*e0c1b49fSNick Terrell } 1338*e0c1b49fSNick Terrell 1339*e0c1b49fSNick Terrell /* catch up */ 1340*e0c1b49fSNick Terrell if (offset) { 1341*e0c1b49fSNick Terrell U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE)); 1342*e0c1b49fSNick Terrell const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex; 1343*e0c1b49fSNick Terrell const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart; 1344*e0c1b49fSNick Terrell while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */ 1345*e0c1b49fSNick Terrell offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE); 1346*e0c1b49fSNick Terrell } 1347*e0c1b49fSNick Terrell 1348*e0c1b49fSNick Terrell /* store sequence */ 1349*e0c1b49fSNick Terrell _storeSequence: 1350*e0c1b49fSNick Terrell { size_t const litLength = start - anchor; 1351*e0c1b49fSNick Terrell ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH); 1352*e0c1b49fSNick Terrell anchor = ip = start + matchLength; 1353*e0c1b49fSNick Terrell } 1354*e0c1b49fSNick Terrell 1355*e0c1b49fSNick Terrell /* check immediate repcode */ 1356*e0c1b49fSNick Terrell while (ip <= ilimit) { 1357*e0c1b49fSNick Terrell const U32 repCurrent = (U32)(ip-base); 1358*e0c1b49fSNick Terrell const U32 windowLow = ZSTD_getLowestMatchIndex(ms, repCurrent, windowLog); 1359*e0c1b49fSNick Terrell const U32 repIndex = repCurrent - offset_2; 1360*e0c1b49fSNick Terrell const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; 1361*e0c1b49fSNick Terrell const BYTE* const repMatch = repBase + repIndex; 1362*e0c1b49fSNick Terrell if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */ 1363*e0c1b49fSNick Terrell if (MEM_read32(ip) == MEM_read32(repMatch)) { 1364*e0c1b49fSNick Terrell /* repcode detected we should take it */ 1365*e0c1b49fSNick Terrell const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; 1366*e0c1b49fSNick Terrell matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; 1367*e0c1b49fSNick Terrell offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */ 1368*e0c1b49fSNick Terrell ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH); 1369*e0c1b49fSNick Terrell ip += matchLength; 1370*e0c1b49fSNick Terrell anchor = ip; 1371*e0c1b49fSNick Terrell continue; /* faster when present ... (?) */ 1372*e0c1b49fSNick Terrell } 1373*e0c1b49fSNick Terrell break; 1374*e0c1b49fSNick Terrell } } 1375*e0c1b49fSNick Terrell 1376*e0c1b49fSNick Terrell /* Save reps for next block */ 1377*e0c1b49fSNick Terrell rep[0] = offset_1; 1378*e0c1b49fSNick Terrell rep[1] = offset_2; 1379*e0c1b49fSNick Terrell 1380*e0c1b49fSNick Terrell /* Return the last literals size */ 1381*e0c1b49fSNick Terrell return (size_t)(iend - anchor); 1382*e0c1b49fSNick Terrell } 1383*e0c1b49fSNick Terrell 1384*e0c1b49fSNick Terrell 1385*e0c1b49fSNick Terrell size_t ZSTD_compressBlock_greedy_extDict( 1386*e0c1b49fSNick Terrell ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1387*e0c1b49fSNick Terrell void const* src, size_t srcSize) 1388*e0c1b49fSNick Terrell { 1389*e0c1b49fSNick Terrell return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0); 1390*e0c1b49fSNick Terrell } 1391*e0c1b49fSNick Terrell 1392*e0c1b49fSNick Terrell size_t ZSTD_compressBlock_lazy_extDict( 1393*e0c1b49fSNick Terrell ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1394*e0c1b49fSNick Terrell void const* src, size_t srcSize) 1395*e0c1b49fSNick Terrell 1396*e0c1b49fSNick Terrell { 1397*e0c1b49fSNick Terrell return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1); 1398*e0c1b49fSNick Terrell } 1399*e0c1b49fSNick Terrell 1400*e0c1b49fSNick Terrell size_t ZSTD_compressBlock_lazy2_extDict( 1401*e0c1b49fSNick Terrell ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1402*e0c1b49fSNick Terrell void const* src, size_t srcSize) 1403*e0c1b49fSNick Terrell 1404*e0c1b49fSNick Terrell { 1405*e0c1b49fSNick Terrell return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2); 1406*e0c1b49fSNick Terrell } 1407*e0c1b49fSNick Terrell 1408*e0c1b49fSNick Terrell size_t ZSTD_compressBlock_btlazy2_extDict( 1409*e0c1b49fSNick Terrell ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1410*e0c1b49fSNick Terrell void const* src, size_t srcSize) 1411*e0c1b49fSNick Terrell 1412*e0c1b49fSNick Terrell { 1413*e0c1b49fSNick Terrell return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2); 1414*e0c1b49fSNick Terrell } 1415