1 /* 2 * Copyright (c) Yann Collet, Facebook, Inc. 3 * All rights reserved. 4 * 5 * This source code is licensed under both the BSD-style license (found in the 6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 * in the COPYING file in the root directory of this source tree). 8 * You may select, at your option, one of the above-listed licenses. 9 */ 10 11 #include "zstd_compress_internal.h" 12 #include "zstd_double_fast.h" 13 14 15 void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, 16 void const* end, ZSTD_dictTableLoadMethod_e dtlm) 17 { 18 const ZSTD_compressionParameters* const cParams = &ms->cParams; 19 U32* const hashLarge = ms->hashTable; 20 U32 const hBitsL = cParams->hashLog; 21 U32 const mls = cParams->minMatch; 22 U32* const hashSmall = ms->chainTable; 23 U32 const hBitsS = cParams->chainLog; 24 const BYTE* const base = ms->window.base; 25 const BYTE* ip = base + ms->nextToUpdate; 26 const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; 27 const U32 fastHashFillStep = 3; 28 29 /* Always insert every fastHashFillStep position into the hash tables. 30 * Insert the other positions into the large hash table if their entry 31 * is empty. 32 */ 33 for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) { 34 U32 const curr = (U32)(ip - base); 35 U32 i; 36 for (i = 0; i < fastHashFillStep; ++i) { 37 size_t const smHash = ZSTD_hashPtr(ip + i, hBitsS, mls); 38 size_t const lgHash = ZSTD_hashPtr(ip + i, hBitsL, 8); 39 if (i == 0) 40 hashSmall[smHash] = curr + i; 41 if (i == 0 || hashLarge[lgHash] == 0) 42 hashLarge[lgHash] = curr + i; 43 /* Only load extra positions for ZSTD_dtlm_full */ 44 if (dtlm == ZSTD_dtlm_fast) 45 break; 46 } } 47 } 48 49 50 FORCE_INLINE_TEMPLATE 51 size_t ZSTD_compressBlock_doubleFast_noDict_generic( 52 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 53 void const* src, size_t srcSize, U32 const mls /* template */) 54 { 55 ZSTD_compressionParameters const* cParams = &ms->cParams; 56 U32* const hashLong = ms->hashTable; 57 const U32 hBitsL = cParams->hashLog; 58 U32* const hashSmall = ms->chainTable; 59 const U32 hBitsS = cParams->chainLog; 60 const BYTE* const base = ms->window.base; 61 const BYTE* const istart = (const BYTE*)src; 62 const BYTE* anchor = istart; 63 const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); 64 /* presumes that, if there is a dictionary, it must be using Attach mode */ 65 const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog); 66 const BYTE* const prefixLowest = base + prefixLowestIndex; 67 const BYTE* const iend = istart + srcSize; 68 const BYTE* const ilimit = iend - HASH_READ_SIZE; 69 U32 offset_1=rep[0], offset_2=rep[1]; 70 U32 offsetSaved = 0; 71 72 size_t mLength; 73 U32 offset; 74 U32 curr; 75 76 /* how many positions to search before increasing step size */ 77 const size_t kStepIncr = 1 << kSearchStrength; 78 /* the position at which to increment the step size if no match is found */ 79 const BYTE* nextStep; 80 size_t step; /* the current step size */ 81 82 size_t hl0; /* the long hash at ip */ 83 size_t hl1; /* the long hash at ip1 */ 84 85 U32 idxl0; /* the long match index for ip */ 86 U32 idxl1; /* the long match index for ip1 */ 87 88 const BYTE* matchl0; /* the long match for ip */ 89 const BYTE* matchs0; /* the short match for ip */ 90 const BYTE* matchl1; /* the long match for ip1 */ 91 92 const BYTE* ip = istart; /* the current position */ 93 const BYTE* ip1; /* the next position */ 94 95 DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_noDict_generic"); 96 97 /* init */ 98 ip += ((ip - prefixLowest) == 0); 99 { 100 U32 const current = (U32)(ip - base); 101 U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog); 102 U32 const maxRep = current - windowLow; 103 if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; 104 if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; 105 } 106 107 /* Outer Loop: one iteration per match found and stored */ 108 while (1) { 109 step = 1; 110 nextStep = ip + kStepIncr; 111 ip1 = ip + step; 112 113 if (ip1 > ilimit) { 114 goto _cleanup; 115 } 116 117 hl0 = ZSTD_hashPtr(ip, hBitsL, 8); 118 idxl0 = hashLong[hl0]; 119 matchl0 = base + idxl0; 120 121 /* Inner Loop: one iteration per search / position */ 122 do { 123 const size_t hs0 = ZSTD_hashPtr(ip, hBitsS, mls); 124 const U32 idxs0 = hashSmall[hs0]; 125 curr = (U32)(ip-base); 126 matchs0 = base + idxs0; 127 128 hashLong[hl0] = hashSmall[hs0] = curr; /* update hash tables */ 129 130 /* check noDict repcode */ 131 if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { 132 mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; 133 ip++; 134 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength); 135 goto _match_stored; 136 } 137 138 hl1 = ZSTD_hashPtr(ip1, hBitsL, 8); 139 140 if (idxl0 > prefixLowestIndex) { 141 /* check prefix long match */ 142 if (MEM_read64(matchl0) == MEM_read64(ip)) { 143 mLength = ZSTD_count(ip+8, matchl0+8, iend) + 8; 144 offset = (U32)(ip-matchl0); 145 while (((ip>anchor) & (matchl0>prefixLowest)) && (ip[-1] == matchl0[-1])) { ip--; matchl0--; mLength++; } /* catch up */ 146 goto _match_found; 147 } 148 } 149 150 idxl1 = hashLong[hl1]; 151 matchl1 = base + idxl1; 152 153 if (idxs0 > prefixLowestIndex) { 154 /* check prefix short match */ 155 if (MEM_read32(matchs0) == MEM_read32(ip)) { 156 goto _search_next_long; 157 } 158 } 159 160 if (ip1 >= nextStep) { 161 PREFETCH_L1(ip1 + 64); 162 PREFETCH_L1(ip1 + 128); 163 step++; 164 nextStep += kStepIncr; 165 } 166 ip = ip1; 167 ip1 += step; 168 169 hl0 = hl1; 170 idxl0 = idxl1; 171 matchl0 = matchl1; 172 #if defined(__aarch64__) 173 PREFETCH_L1(ip+256); 174 #endif 175 } while (ip1 <= ilimit); 176 177 _cleanup: 178 /* save reps for next block */ 179 rep[0] = offset_1 ? offset_1 : offsetSaved; 180 rep[1] = offset_2 ? offset_2 : offsetSaved; 181 182 /* Return the last literals size */ 183 return (size_t)(iend - anchor); 184 185 _search_next_long: 186 187 /* check prefix long +1 match */ 188 if (idxl1 > prefixLowestIndex) { 189 if (MEM_read64(matchl1) == MEM_read64(ip1)) { 190 ip = ip1; 191 mLength = ZSTD_count(ip+8, matchl1+8, iend) + 8; 192 offset = (U32)(ip-matchl1); 193 while (((ip>anchor) & (matchl1>prefixLowest)) && (ip[-1] == matchl1[-1])) { ip--; matchl1--; mLength++; } /* catch up */ 194 goto _match_found; 195 } 196 } 197 198 /* if no long +1 match, explore the short match we found */ 199 mLength = ZSTD_count(ip+4, matchs0+4, iend) + 4; 200 offset = (U32)(ip - matchs0); 201 while (((ip>anchor) & (matchs0>prefixLowest)) && (ip[-1] == matchs0[-1])) { ip--; matchs0--; mLength++; } /* catch up */ 202 203 /* fall-through */ 204 205 _match_found: /* requires ip, offset, mLength */ 206 offset_2 = offset_1; 207 offset_1 = offset; 208 209 if (step < 4) { 210 /* It is unsafe to write this value back to the hashtable when ip1 is 211 * greater than or equal to the new ip we will have after we're done 212 * processing this match. Rather than perform that test directly 213 * (ip1 >= ip + mLength), which costs speed in practice, we do a simpler 214 * more predictable test. The minmatch even if we take a short match is 215 * 4 bytes, so as long as step, the distance between ip and ip1 216 * (initially) is less than 4, we know ip1 < new ip. */ 217 hashLong[hl1] = (U32)(ip1 - base); 218 } 219 220 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength); 221 222 _match_stored: 223 /* match found */ 224 ip += mLength; 225 anchor = ip; 226 227 if (ip <= ilimit) { 228 /* Complementary insertion */ 229 /* done after iLimit test, as candidates could be > iend-8 */ 230 { U32 const indexToInsert = curr+2; 231 hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; 232 hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); 233 hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; 234 hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base); 235 } 236 237 /* check immediate repcode */ 238 while ( (ip <= ilimit) 239 && ( (offset_2>0) 240 & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { 241 /* store sequence */ 242 size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; 243 U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ 244 hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base); 245 hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base); 246 ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, rLength); 247 ip += rLength; 248 anchor = ip; 249 continue; /* faster when present ... (?) */ 250 } 251 } 252 } 253 } 254 255 256 FORCE_INLINE_TEMPLATE 257 size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic( 258 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 259 void const* src, size_t srcSize, 260 U32 const mls /* template */) 261 { 262 ZSTD_compressionParameters const* cParams = &ms->cParams; 263 U32* const hashLong = ms->hashTable; 264 const U32 hBitsL = cParams->hashLog; 265 U32* const hashSmall = ms->chainTable; 266 const U32 hBitsS = cParams->chainLog; 267 const BYTE* const base = ms->window.base; 268 const BYTE* const istart = (const BYTE*)src; 269 const BYTE* ip = istart; 270 const BYTE* anchor = istart; 271 const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); 272 /* presumes that, if there is a dictionary, it must be using Attach mode */ 273 const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog); 274 const BYTE* const prefixLowest = base + prefixLowestIndex; 275 const BYTE* const iend = istart + srcSize; 276 const BYTE* const ilimit = iend - HASH_READ_SIZE; 277 U32 offset_1=rep[0], offset_2=rep[1]; 278 U32 offsetSaved = 0; 279 280 const ZSTD_matchState_t* const dms = ms->dictMatchState; 281 const ZSTD_compressionParameters* const dictCParams = &dms->cParams; 282 const U32* const dictHashLong = dms->hashTable; 283 const U32* const dictHashSmall = dms->chainTable; 284 const U32 dictStartIndex = dms->window.dictLimit; 285 const BYTE* const dictBase = dms->window.base; 286 const BYTE* const dictStart = dictBase + dictStartIndex; 287 const BYTE* const dictEnd = dms->window.nextSrc; 288 const U32 dictIndexDelta = prefixLowestIndex - (U32)(dictEnd - dictBase); 289 const U32 dictHBitsL = dictCParams->hashLog; 290 const U32 dictHBitsS = dictCParams->chainLog; 291 const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart)); 292 293 DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_dictMatchState_generic"); 294 295 /* if a dictionary is attached, it must be within window range */ 296 assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex); 297 298 /* init */ 299 ip += (dictAndPrefixLength == 0); 300 301 /* dictMatchState repCode checks don't currently handle repCode == 0 302 * disabling. */ 303 assert(offset_1 <= dictAndPrefixLength); 304 assert(offset_2 <= dictAndPrefixLength); 305 306 /* Main Search Loop */ 307 while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ 308 size_t mLength; 309 U32 offset; 310 size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8); 311 size_t const h = ZSTD_hashPtr(ip, hBitsS, mls); 312 size_t const dictHL = ZSTD_hashPtr(ip, dictHBitsL, 8); 313 size_t const dictHS = ZSTD_hashPtr(ip, dictHBitsS, mls); 314 U32 const curr = (U32)(ip-base); 315 U32 const matchIndexL = hashLong[h2]; 316 U32 matchIndexS = hashSmall[h]; 317 const BYTE* matchLong = base + matchIndexL; 318 const BYTE* match = base + matchIndexS; 319 const U32 repIndex = curr + 1 - offset_1; 320 const BYTE* repMatch = (repIndex < prefixLowestIndex) ? 321 dictBase + (repIndex - dictIndexDelta) : 322 base + repIndex; 323 hashLong[h2] = hashSmall[h] = curr; /* update hash tables */ 324 325 /* check repcode */ 326 if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) 327 && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { 328 const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; 329 mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; 330 ip++; 331 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength); 332 goto _match_stored; 333 } 334 335 if (matchIndexL > prefixLowestIndex) { 336 /* check prefix long match */ 337 if (MEM_read64(matchLong) == MEM_read64(ip)) { 338 mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8; 339 offset = (U32)(ip-matchLong); 340 while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ 341 goto _match_found; 342 } 343 } else { 344 /* check dictMatchState long match */ 345 U32 const dictMatchIndexL = dictHashLong[dictHL]; 346 const BYTE* dictMatchL = dictBase + dictMatchIndexL; 347 assert(dictMatchL < dictEnd); 348 349 if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) { 350 mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8; 351 offset = (U32)(curr - dictMatchIndexL - dictIndexDelta); 352 while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */ 353 goto _match_found; 354 } } 355 356 if (matchIndexS > prefixLowestIndex) { 357 /* check prefix short match */ 358 if (MEM_read32(match) == MEM_read32(ip)) { 359 goto _search_next_long; 360 } 361 } else { 362 /* check dictMatchState short match */ 363 U32 const dictMatchIndexS = dictHashSmall[dictHS]; 364 match = dictBase + dictMatchIndexS; 365 matchIndexS = dictMatchIndexS + dictIndexDelta; 366 367 if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) { 368 goto _search_next_long; 369 } } 370 371 ip += ((ip-anchor) >> kSearchStrength) + 1; 372 #if defined(__aarch64__) 373 PREFETCH_L1(ip+256); 374 #endif 375 continue; 376 377 _search_next_long: 378 379 { size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8); 380 size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8); 381 U32 const matchIndexL3 = hashLong[hl3]; 382 const BYTE* matchL3 = base + matchIndexL3; 383 hashLong[hl3] = curr + 1; 384 385 /* check prefix long +1 match */ 386 if (matchIndexL3 > prefixLowestIndex) { 387 if (MEM_read64(matchL3) == MEM_read64(ip+1)) { 388 mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8; 389 ip++; 390 offset = (U32)(ip-matchL3); 391 while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */ 392 goto _match_found; 393 } 394 } else { 395 /* check dict long +1 match */ 396 U32 const dictMatchIndexL3 = dictHashLong[dictHLNext]; 397 const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3; 398 assert(dictMatchL3 < dictEnd); 399 if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) { 400 mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8; 401 ip++; 402 offset = (U32)(curr + 1 - dictMatchIndexL3 - dictIndexDelta); 403 while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */ 404 goto _match_found; 405 } } } 406 407 /* if no long +1 match, explore the short match we found */ 408 if (matchIndexS < prefixLowestIndex) { 409 mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4; 410 offset = (U32)(curr - matchIndexS); 411 while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ 412 } else { 413 mLength = ZSTD_count(ip+4, match+4, iend) + 4; 414 offset = (U32)(ip - match); 415 while (((ip>anchor) & (match>prefixLowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ 416 } 417 418 _match_found: 419 offset_2 = offset_1; 420 offset_1 = offset; 421 422 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength); 423 424 _match_stored: 425 /* match found */ 426 ip += mLength; 427 anchor = ip; 428 429 if (ip <= ilimit) { 430 /* Complementary insertion */ 431 /* done after iLimit test, as candidates could be > iend-8 */ 432 { U32 const indexToInsert = curr+2; 433 hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; 434 hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); 435 hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; 436 hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base); 437 } 438 439 /* check immediate repcode */ 440 while (ip <= ilimit) { 441 U32 const current2 = (U32)(ip-base); 442 U32 const repIndex2 = current2 - offset_2; 443 const BYTE* repMatch2 = repIndex2 < prefixLowestIndex ? 444 dictBase + repIndex2 - dictIndexDelta : 445 base + repIndex2; 446 if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) 447 && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { 448 const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend; 449 size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4; 450 U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ 451 ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2); 452 hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; 453 hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; 454 ip += repLength2; 455 anchor = ip; 456 continue; 457 } 458 break; 459 } 460 } 461 } /* while (ip < ilimit) */ 462 463 /* save reps for next block */ 464 rep[0] = offset_1 ? offset_1 : offsetSaved; 465 rep[1] = offset_2 ? offset_2 : offsetSaved; 466 467 /* Return the last literals size */ 468 return (size_t)(iend - anchor); 469 } 470 471 #define ZSTD_GEN_DFAST_FN(dictMode, mls) \ 472 static size_t ZSTD_compressBlock_doubleFast_##dictMode##_##mls( \ 473 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \ 474 void const* src, size_t srcSize) \ 475 { \ 476 return ZSTD_compressBlock_doubleFast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls); \ 477 } 478 479 ZSTD_GEN_DFAST_FN(noDict, 4) 480 ZSTD_GEN_DFAST_FN(noDict, 5) 481 ZSTD_GEN_DFAST_FN(noDict, 6) 482 ZSTD_GEN_DFAST_FN(noDict, 7) 483 484 ZSTD_GEN_DFAST_FN(dictMatchState, 4) 485 ZSTD_GEN_DFAST_FN(dictMatchState, 5) 486 ZSTD_GEN_DFAST_FN(dictMatchState, 6) 487 ZSTD_GEN_DFAST_FN(dictMatchState, 7) 488 489 490 size_t ZSTD_compressBlock_doubleFast( 491 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 492 void const* src, size_t srcSize) 493 { 494 const U32 mls = ms->cParams.minMatch; 495 switch(mls) 496 { 497 default: /* includes case 3 */ 498 case 4 : 499 return ZSTD_compressBlock_doubleFast_noDict_4(ms, seqStore, rep, src, srcSize); 500 case 5 : 501 return ZSTD_compressBlock_doubleFast_noDict_5(ms, seqStore, rep, src, srcSize); 502 case 6 : 503 return ZSTD_compressBlock_doubleFast_noDict_6(ms, seqStore, rep, src, srcSize); 504 case 7 : 505 return ZSTD_compressBlock_doubleFast_noDict_7(ms, seqStore, rep, src, srcSize); 506 } 507 } 508 509 510 size_t ZSTD_compressBlock_doubleFast_dictMatchState( 511 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 512 void const* src, size_t srcSize) 513 { 514 const U32 mls = ms->cParams.minMatch; 515 switch(mls) 516 { 517 default: /* includes case 3 */ 518 case 4 : 519 return ZSTD_compressBlock_doubleFast_dictMatchState_4(ms, seqStore, rep, src, srcSize); 520 case 5 : 521 return ZSTD_compressBlock_doubleFast_dictMatchState_5(ms, seqStore, rep, src, srcSize); 522 case 6 : 523 return ZSTD_compressBlock_doubleFast_dictMatchState_6(ms, seqStore, rep, src, srcSize); 524 case 7 : 525 return ZSTD_compressBlock_doubleFast_dictMatchState_7(ms, seqStore, rep, src, srcSize); 526 } 527 } 528 529 530 static size_t ZSTD_compressBlock_doubleFast_extDict_generic( 531 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 532 void const* src, size_t srcSize, 533 U32 const mls /* template */) 534 { 535 ZSTD_compressionParameters const* cParams = &ms->cParams; 536 U32* const hashLong = ms->hashTable; 537 U32 const hBitsL = cParams->hashLog; 538 U32* const hashSmall = ms->chainTable; 539 U32 const hBitsS = cParams->chainLog; 540 const BYTE* const istart = (const BYTE*)src; 541 const BYTE* ip = istart; 542 const BYTE* anchor = istart; 543 const BYTE* const iend = istart + srcSize; 544 const BYTE* const ilimit = iend - 8; 545 const BYTE* const base = ms->window.base; 546 const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); 547 const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog); 548 const U32 dictStartIndex = lowLimit; 549 const U32 dictLimit = ms->window.dictLimit; 550 const U32 prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit; 551 const BYTE* const prefixStart = base + prefixStartIndex; 552 const BYTE* const dictBase = ms->window.dictBase; 553 const BYTE* const dictStart = dictBase + dictStartIndex; 554 const BYTE* const dictEnd = dictBase + prefixStartIndex; 555 U32 offset_1=rep[0], offset_2=rep[1]; 556 557 DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)", srcSize); 558 559 /* if extDict is invalidated due to maxDistance, switch to "regular" variant */ 560 if (prefixStartIndex == dictStartIndex) 561 return ZSTD_compressBlock_doubleFast(ms, seqStore, rep, src, srcSize); 562 563 /* Search Loop */ 564 while (ip < ilimit) { /* < instead of <=, because (ip+1) */ 565 const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls); 566 const U32 matchIndex = hashSmall[hSmall]; 567 const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base; 568 const BYTE* match = matchBase + matchIndex; 569 570 const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8); 571 const U32 matchLongIndex = hashLong[hLong]; 572 const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base; 573 const BYTE* matchLong = matchLongBase + matchLongIndex; 574 575 const U32 curr = (U32)(ip-base); 576 const U32 repIndex = curr + 1 - offset_1; /* offset_1 expected <= curr +1 */ 577 const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base; 578 const BYTE* const repMatch = repBase + repIndex; 579 size_t mLength; 580 hashSmall[hSmall] = hashLong[hLong] = curr; /* update hash table */ 581 582 if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */ 583 & (offset_1 <= curr+1 - dictStartIndex)) /* note: we are searching at curr+1 */ 584 && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { 585 const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; 586 mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; 587 ip++; 588 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength); 589 } else { 590 if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) { 591 const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend; 592 const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart; 593 U32 offset; 594 mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, prefixStart) + 8; 595 offset = curr - matchLongIndex; 596 while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ 597 offset_2 = offset_1; 598 offset_1 = offset; 599 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength); 600 601 } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) { 602 size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8); 603 U32 const matchIndex3 = hashLong[h3]; 604 const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base; 605 const BYTE* match3 = match3Base + matchIndex3; 606 U32 offset; 607 hashLong[h3] = curr + 1; 608 if ( (matchIndex3 > dictStartIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) { 609 const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend; 610 const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart; 611 mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, prefixStart) + 8; 612 ip++; 613 offset = curr+1 - matchIndex3; 614 while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */ 615 } else { 616 const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend; 617 const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart; 618 mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4; 619 offset = curr - matchIndex; 620 while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ 621 } 622 offset_2 = offset_1; 623 offset_1 = offset; 624 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength); 625 626 } else { 627 ip += ((ip-anchor) >> kSearchStrength) + 1; 628 continue; 629 } } 630 631 /* move to next sequence start */ 632 ip += mLength; 633 anchor = ip; 634 635 if (ip <= ilimit) { 636 /* Complementary insertion */ 637 /* done after iLimit test, as candidates could be > iend-8 */ 638 { U32 const indexToInsert = curr+2; 639 hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; 640 hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); 641 hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; 642 hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base); 643 } 644 645 /* check immediate repcode */ 646 while (ip <= ilimit) { 647 U32 const current2 = (U32)(ip-base); 648 U32 const repIndex2 = current2 - offset_2; 649 const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; 650 if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */ 651 & (offset_2 <= current2 - dictStartIndex)) 652 && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { 653 const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; 654 size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; 655 U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ 656 ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2); 657 hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; 658 hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; 659 ip += repLength2; 660 anchor = ip; 661 continue; 662 } 663 break; 664 } } } 665 666 /* save reps for next block */ 667 rep[0] = offset_1; 668 rep[1] = offset_2; 669 670 /* Return the last literals size */ 671 return (size_t)(iend - anchor); 672 } 673 674 ZSTD_GEN_DFAST_FN(extDict, 4) 675 ZSTD_GEN_DFAST_FN(extDict, 5) 676 ZSTD_GEN_DFAST_FN(extDict, 6) 677 ZSTD_GEN_DFAST_FN(extDict, 7) 678 679 size_t ZSTD_compressBlock_doubleFast_extDict( 680 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 681 void const* src, size_t srcSize) 682 { 683 U32 const mls = ms->cParams.minMatch; 684 switch(mls) 685 { 686 default: /* includes case 3 */ 687 case 4 : 688 return ZSTD_compressBlock_doubleFast_extDict_4(ms, seqStore, rep, src, srcSize); 689 case 5 : 690 return ZSTD_compressBlock_doubleFast_extDict_5(ms, seqStore, rep, src, srcSize); 691 case 6 : 692 return ZSTD_compressBlock_doubleFast_extDict_6(ms, seqStore, rep, src, srcSize); 693 case 7 : 694 return ZSTD_compressBlock_doubleFast_extDict_7(ms, seqStore, rep, src, srcSize); 695 } 696 } 697