xref: /openbmc/linux/lib/zstd/compress/hist.c (revision e0c1b49f)
1*e0c1b49fSNick Terrell /* ******************************************************************
2*e0c1b49fSNick Terrell  * hist : Histogram functions
3*e0c1b49fSNick Terrell  * part of Finite State Entropy project
4*e0c1b49fSNick Terrell  * Copyright (c) Yann Collet, Facebook, Inc.
5*e0c1b49fSNick Terrell  *
6*e0c1b49fSNick Terrell  *  You can contact the author at :
7*e0c1b49fSNick Terrell  *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
8*e0c1b49fSNick Terrell  *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
9*e0c1b49fSNick Terrell  *
10*e0c1b49fSNick Terrell  * This source code is licensed under both the BSD-style license (found in the
11*e0c1b49fSNick Terrell  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
12*e0c1b49fSNick Terrell  * in the COPYING file in the root directory of this source tree).
13*e0c1b49fSNick Terrell  * You may select, at your option, one of the above-listed licenses.
14*e0c1b49fSNick Terrell ****************************************************************** */
15*e0c1b49fSNick Terrell 
16*e0c1b49fSNick Terrell /* --- dependencies --- */
17*e0c1b49fSNick Terrell #include "../common/mem.h"             /* U32, BYTE, etc. */
18*e0c1b49fSNick Terrell #include "../common/debug.h"           /* assert, DEBUGLOG */
19*e0c1b49fSNick Terrell #include "../common/error_private.h"   /* ERROR */
20*e0c1b49fSNick Terrell #include "hist.h"
21*e0c1b49fSNick Terrell 
22*e0c1b49fSNick Terrell 
23*e0c1b49fSNick Terrell /* --- Error management --- */
HIST_isError(size_t code)24*e0c1b49fSNick Terrell unsigned HIST_isError(size_t code) { return ERR_isError(code); }
25*e0c1b49fSNick Terrell 
26*e0c1b49fSNick Terrell /*-**************************************************************
27*e0c1b49fSNick Terrell  *  Histogram functions
28*e0c1b49fSNick Terrell  ****************************************************************/
HIST_count_simple(unsigned * count,unsigned * maxSymbolValuePtr,const void * src,size_t srcSize)29*e0c1b49fSNick Terrell unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
30*e0c1b49fSNick Terrell                            const void* src, size_t srcSize)
31*e0c1b49fSNick Terrell {
32*e0c1b49fSNick Terrell     const BYTE* ip = (const BYTE*)src;
33*e0c1b49fSNick Terrell     const BYTE* const end = ip + srcSize;
34*e0c1b49fSNick Terrell     unsigned maxSymbolValue = *maxSymbolValuePtr;
35*e0c1b49fSNick Terrell     unsigned largestCount=0;
36*e0c1b49fSNick Terrell 
37*e0c1b49fSNick Terrell     ZSTD_memset(count, 0, (maxSymbolValue+1) * sizeof(*count));
38*e0c1b49fSNick Terrell     if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
39*e0c1b49fSNick Terrell 
40*e0c1b49fSNick Terrell     while (ip<end) {
41*e0c1b49fSNick Terrell         assert(*ip <= maxSymbolValue);
42*e0c1b49fSNick Terrell         count[*ip++]++;
43*e0c1b49fSNick Terrell     }
44*e0c1b49fSNick Terrell 
45*e0c1b49fSNick Terrell     while (!count[maxSymbolValue]) maxSymbolValue--;
46*e0c1b49fSNick Terrell     *maxSymbolValuePtr = maxSymbolValue;
47*e0c1b49fSNick Terrell 
48*e0c1b49fSNick Terrell     {   U32 s;
49*e0c1b49fSNick Terrell         for (s=0; s<=maxSymbolValue; s++)
50*e0c1b49fSNick Terrell             if (count[s] > largestCount) largestCount = count[s];
51*e0c1b49fSNick Terrell     }
52*e0c1b49fSNick Terrell 
53*e0c1b49fSNick Terrell     return largestCount;
54*e0c1b49fSNick Terrell }
55*e0c1b49fSNick Terrell 
56*e0c1b49fSNick Terrell typedef enum { trustInput, checkMaxSymbolValue } HIST_checkInput_e;
57*e0c1b49fSNick Terrell 
58*e0c1b49fSNick Terrell /* HIST_count_parallel_wksp() :
59*e0c1b49fSNick Terrell  * store histogram into 4 intermediate tables, recombined at the end.
60*e0c1b49fSNick Terrell  * this design makes better use of OoO cpus,
61*e0c1b49fSNick Terrell  * and is noticeably faster when some values are heavily repeated.
62*e0c1b49fSNick Terrell  * But it needs some additional workspace for intermediate tables.
63*e0c1b49fSNick Terrell  * `workSpace` must be a U32 table of size >= HIST_WKSP_SIZE_U32.
64*e0c1b49fSNick Terrell  * @return : largest histogram frequency,
65*e0c1b49fSNick Terrell  *           or an error code (notably when histogram's alphabet is larger than *maxSymbolValuePtr) */
HIST_count_parallel_wksp(unsigned * count,unsigned * maxSymbolValuePtr,const void * source,size_t sourceSize,HIST_checkInput_e check,U32 * const workSpace)66*e0c1b49fSNick Terrell static size_t HIST_count_parallel_wksp(
67*e0c1b49fSNick Terrell                                 unsigned* count, unsigned* maxSymbolValuePtr,
68*e0c1b49fSNick Terrell                                 const void* source, size_t sourceSize,
69*e0c1b49fSNick Terrell                                 HIST_checkInput_e check,
70*e0c1b49fSNick Terrell                                 U32* const workSpace)
71*e0c1b49fSNick Terrell {
72*e0c1b49fSNick Terrell     const BYTE* ip = (const BYTE*)source;
73*e0c1b49fSNick Terrell     const BYTE* const iend = ip+sourceSize;
74*e0c1b49fSNick Terrell     size_t const countSize = (*maxSymbolValuePtr + 1) * sizeof(*count);
75*e0c1b49fSNick Terrell     unsigned max=0;
76*e0c1b49fSNick Terrell     U32* const Counting1 = workSpace;
77*e0c1b49fSNick Terrell     U32* const Counting2 = Counting1 + 256;
78*e0c1b49fSNick Terrell     U32* const Counting3 = Counting2 + 256;
79*e0c1b49fSNick Terrell     U32* const Counting4 = Counting3 + 256;
80*e0c1b49fSNick Terrell 
81*e0c1b49fSNick Terrell     /* safety checks */
82*e0c1b49fSNick Terrell     assert(*maxSymbolValuePtr <= 255);
83*e0c1b49fSNick Terrell     if (!sourceSize) {
84*e0c1b49fSNick Terrell         ZSTD_memset(count, 0, countSize);
85*e0c1b49fSNick Terrell         *maxSymbolValuePtr = 0;
86*e0c1b49fSNick Terrell         return 0;
87*e0c1b49fSNick Terrell     }
88*e0c1b49fSNick Terrell     ZSTD_memset(workSpace, 0, 4*256*sizeof(unsigned));
89*e0c1b49fSNick Terrell 
90*e0c1b49fSNick Terrell     /* by stripes of 16 bytes */
91*e0c1b49fSNick Terrell     {   U32 cached = MEM_read32(ip); ip += 4;
92*e0c1b49fSNick Terrell         while (ip < iend-15) {
93*e0c1b49fSNick Terrell             U32 c = cached; cached = MEM_read32(ip); ip += 4;
94*e0c1b49fSNick Terrell             Counting1[(BYTE) c     ]++;
95*e0c1b49fSNick Terrell             Counting2[(BYTE)(c>>8) ]++;
96*e0c1b49fSNick Terrell             Counting3[(BYTE)(c>>16)]++;
97*e0c1b49fSNick Terrell             Counting4[       c>>24 ]++;
98*e0c1b49fSNick Terrell             c = cached; cached = MEM_read32(ip); ip += 4;
99*e0c1b49fSNick Terrell             Counting1[(BYTE) c     ]++;
100*e0c1b49fSNick Terrell             Counting2[(BYTE)(c>>8) ]++;
101*e0c1b49fSNick Terrell             Counting3[(BYTE)(c>>16)]++;
102*e0c1b49fSNick Terrell             Counting4[       c>>24 ]++;
103*e0c1b49fSNick Terrell             c = cached; cached = MEM_read32(ip); ip += 4;
104*e0c1b49fSNick Terrell             Counting1[(BYTE) c     ]++;
105*e0c1b49fSNick Terrell             Counting2[(BYTE)(c>>8) ]++;
106*e0c1b49fSNick Terrell             Counting3[(BYTE)(c>>16)]++;
107*e0c1b49fSNick Terrell             Counting4[       c>>24 ]++;
108*e0c1b49fSNick Terrell             c = cached; cached = MEM_read32(ip); ip += 4;
109*e0c1b49fSNick Terrell             Counting1[(BYTE) c     ]++;
110*e0c1b49fSNick Terrell             Counting2[(BYTE)(c>>8) ]++;
111*e0c1b49fSNick Terrell             Counting3[(BYTE)(c>>16)]++;
112*e0c1b49fSNick Terrell             Counting4[       c>>24 ]++;
113*e0c1b49fSNick Terrell         }
114*e0c1b49fSNick Terrell         ip-=4;
115*e0c1b49fSNick Terrell     }
116*e0c1b49fSNick Terrell 
117*e0c1b49fSNick Terrell     /* finish last symbols */
118*e0c1b49fSNick Terrell     while (ip<iend) Counting1[*ip++]++;
119*e0c1b49fSNick Terrell 
120*e0c1b49fSNick Terrell     {   U32 s;
121*e0c1b49fSNick Terrell         for (s=0; s<256; s++) {
122*e0c1b49fSNick Terrell             Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
123*e0c1b49fSNick Terrell             if (Counting1[s] > max) max = Counting1[s];
124*e0c1b49fSNick Terrell     }   }
125*e0c1b49fSNick Terrell 
126*e0c1b49fSNick Terrell     {   unsigned maxSymbolValue = 255;
127*e0c1b49fSNick Terrell         while (!Counting1[maxSymbolValue]) maxSymbolValue--;
128*e0c1b49fSNick Terrell         if (check && maxSymbolValue > *maxSymbolValuePtr) return ERROR(maxSymbolValue_tooSmall);
129*e0c1b49fSNick Terrell         *maxSymbolValuePtr = maxSymbolValue;
130*e0c1b49fSNick Terrell         ZSTD_memmove(count, Counting1, countSize);   /* in case count & Counting1 are overlapping */
131*e0c1b49fSNick Terrell     }
132*e0c1b49fSNick Terrell     return (size_t)max;
133*e0c1b49fSNick Terrell }
134*e0c1b49fSNick Terrell 
135*e0c1b49fSNick Terrell /* HIST_countFast_wksp() :
136*e0c1b49fSNick Terrell  * Same as HIST_countFast(), but using an externally provided scratch buffer.
137*e0c1b49fSNick Terrell  * `workSpace` is a writable buffer which must be 4-bytes aligned,
138*e0c1b49fSNick Terrell  * `workSpaceSize` must be >= HIST_WKSP_SIZE
139*e0c1b49fSNick Terrell  */
HIST_countFast_wksp(unsigned * count,unsigned * maxSymbolValuePtr,const void * source,size_t sourceSize,void * workSpace,size_t workSpaceSize)140*e0c1b49fSNick Terrell size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
141*e0c1b49fSNick Terrell                           const void* source, size_t sourceSize,
142*e0c1b49fSNick Terrell                           void* workSpace, size_t workSpaceSize)
143*e0c1b49fSNick Terrell {
144*e0c1b49fSNick Terrell     if (sourceSize < 1500) /* heuristic threshold */
145*e0c1b49fSNick Terrell         return HIST_count_simple(count, maxSymbolValuePtr, source, sourceSize);
146*e0c1b49fSNick Terrell     if ((size_t)workSpace & 3) return ERROR(GENERIC);  /* must be aligned on 4-bytes boundaries */
147*e0c1b49fSNick Terrell     if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall);
148*e0c1b49fSNick Terrell     return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, trustInput, (U32*)workSpace);
149*e0c1b49fSNick Terrell }
150*e0c1b49fSNick Terrell 
151*e0c1b49fSNick Terrell /* HIST_count_wksp() :
152*e0c1b49fSNick Terrell  * Same as HIST_count(), but using an externally provided scratch buffer.
153*e0c1b49fSNick Terrell  * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */
HIST_count_wksp(unsigned * count,unsigned * maxSymbolValuePtr,const void * source,size_t sourceSize,void * workSpace,size_t workSpaceSize)154*e0c1b49fSNick Terrell size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
155*e0c1b49fSNick Terrell                        const void* source, size_t sourceSize,
156*e0c1b49fSNick Terrell                        void* workSpace, size_t workSpaceSize)
157*e0c1b49fSNick Terrell {
158*e0c1b49fSNick Terrell     if ((size_t)workSpace & 3) return ERROR(GENERIC);  /* must be aligned on 4-bytes boundaries */
159*e0c1b49fSNick Terrell     if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall);
160*e0c1b49fSNick Terrell     if (*maxSymbolValuePtr < 255)
161*e0c1b49fSNick Terrell         return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, checkMaxSymbolValue, (U32*)workSpace);
162*e0c1b49fSNick Terrell     *maxSymbolValuePtr = 255;
163*e0c1b49fSNick Terrell     return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace, workSpaceSize);
164*e0c1b49fSNick Terrell }
165*e0c1b49fSNick Terrell 
166