Line data Source code
1 : /*
2 : Copyright 2015 Esri
3 :
4 : Licensed under the Apache License, Version 2.0 (the "License");
5 : you may not use this file except in compliance with the License.
6 : You may obtain a copy of the License at
7 :
8 : http://www.apache.org/licenses/LICENSE-2.0
9 :
10 : Unless required by applicable law or agreed to in writing, software
11 : distributed under the License is distributed on an "AS IS" BASIS,
12 : WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 : See the License for the specific language governing permissions and
14 : limitations under the License.
15 :
16 : A local copy of the license and additional notices are located with the
17 : source distribution at:
18 :
19 : http://github.com/Esri/lerc/
20 :
21 : Contributors: Thomas Maurer
22 : */
23 :
24 : #ifndef HUFFMAN_H
25 : #define HUFFMAN_H
26 :
27 : #include <vector>
28 : #include <cstring>
29 : #include <utility>
30 : #include "Defines.h"
31 :
32 : NAMESPACE_LERC_START
33 :
34 : class Huffman
35 : {
36 : public:
37 7809 : Huffman() : m_maxHistoSize(1 << 15), m_maxNumBitsLUT(12), m_numBitsToSkipInTree(0), m_root(nullptr) {}
38 7809 : ~Huffman() { Clear(); }
39 :
40 : // Limitation: We limit the max Huffman code length to 32 bit. If this happens, the function ComputeCodes()
41 : // returns false. In that case don't use Huffman coding but Lerc only instead.
42 : // This won't happen easily. For the worst case input maximizing the Huffman code length the counts in the
43 : // histogram have to follow the Fibonacci sequence. Even then, for < 9,227,465 data values, 32 bit is
44 : // the max Huffman code length possible.
45 :
46 : bool ComputeCodes(const std::vector<int>& histo); // input histogram, size < 2^15
47 :
48 : bool ComputeCompressedSize(const std::vector<int>& histo, int& numBytes, double& avgBpp) const;
49 :
50 : // LUT of same size as histogram, each entry has length of Huffman bit code, and the bit code
51 3327 : const std::vector<std::pair<unsigned short, unsigned int> >& GetCodes() const { return m_codeTable; }
52 : bool SetCodes(const std::vector<std::pair<unsigned short, unsigned int> >& codeTable);
53 :
54 : bool WriteCodeTable(Byte** ppByte, int lerc2Version) const;
55 : bool ReadCodeTable(const Byte** ppByte, size_t& nBytesRemaining, int lerc2Version);
56 :
57 : bool BuildTreeFromCodes(int& numBitsLUT);
58 : bool DecodeOneValue(const unsigned int** ppSrc, size_t& nBytesRemaining, int& bitPos, int numBitsLUT, int& value) const;
59 : bool DecodeOneValue_NoOverrunCheck(const unsigned int** ppSrc, size_t& nBytesRemaining, int& bitPos, int numBitsLUT, int& value) const;
60 : void Clear();
61 :
62 : private:
63 :
64 : struct Node
65 : {
66 : int weight;
67 : short value;
68 : Node *child0, *child1;
69 :
70 124238 : Node(short val, int cnt) // new leaf node for val
71 124238 : {
72 124238 : value = val;
73 124238 : weight = -cnt;
74 124238 : child0 = child1 = nullptr;
75 124238 : }
76 :
77 120666 : Node(Node* c0, Node* c1) // new internal node from children c0 and c1
78 120666 : {
79 120666 : value = -1;
80 120666 : weight = c0->weight + c1->weight;
81 120666 : child0 = c0;
82 120666 : child1 = c1;
83 120666 : }
84 :
85 1472970 : bool operator < (const Node& other) const { return weight < other.weight; }
86 :
87 244863 : bool TreeToLUT(unsigned short numBits, unsigned int bits, std::vector<std::pair<unsigned short, unsigned int> >& luTable) const
88 : {
89 244863 : if (child0)
90 : {
91 241332 : if (numBits == 32 // the max huffman code length we allow
92 120666 : || !child0->TreeToLUT(numBits + 1, (bits << 1) + 0, luTable)
93 241332 : || !child1->TreeToLUT(numBits + 1, (bits << 1) + 1, luTable))
94 : {
95 0 : return false;
96 : }
97 : }
98 : else
99 124197 : luTable[value] = std::pair<unsigned short, unsigned int>(numBits, bits);
100 :
101 244863 : return true;
102 : }
103 :
104 247190 : void FreeTree(int& n)
105 : {
106 247190 : if (child0)
107 : {
108 121834 : child0->FreeTree(n);
109 121834 : delete child0;
110 121834 : child0 = nullptr;
111 121834 : n--;
112 : }
113 247190 : if (child1)
114 : {
115 121784 : child1->FreeTree(n);
116 121784 : delete child1;
117 121784 : child1 = nullptr;
118 121784 : n--;
119 : }
120 247190 : }
121 : };
122 :
123 : private:
124 :
125 : size_t m_maxHistoSize;
126 : std::vector<std::pair<unsigned short, unsigned int> > m_codeTable;
127 : std::vector<std::pair<short, short> > m_decodeLUT;
128 : int m_maxNumBitsLUT;
129 : int m_numBitsToSkipInTree;
130 : Node* m_root;
131 :
132 1606790 : static int GetIndexWrapAround(int i, int size) { return i - (i < size ? 0 : size); }
133 :
134 : bool ComputeNumBytesCodeTable(int& numBytes) const;
135 : bool GetRange(int& i0, int& i1, int& maxCodeLength) const;
136 : bool BitStuffCodes(Byte** ppByte, int i0, int i1) const;
137 : bool BitUnStuffCodes(const Byte** ppByte, size_t& nBytesRemaining, int i0, int i1);
138 : bool ConvertCodesToCanonical();
139 : void ClearTree();
140 : };
141 :
142 : // -------------------------------------------------------------------------- ;
143 :
144 12181 : inline bool Huffman::DecodeOneValue(const unsigned int** ppSrc, size_t& nBytesRemaining, int& bitPos, int numBitsLUT, int& value) const
145 : {
146 12181 : const size_t sizeUInt = sizeof(unsigned int);
147 :
148 12181 : if (!ppSrc || !(*ppSrc) || bitPos < 0 || bitPos >= 32 || nBytesRemaining < sizeUInt)
149 0 : return false;
150 :
151 : // first get the next (up to) 12 bits as a copy
152 12181 : int valTmp = ((**ppSrc) << bitPos) >> (32 - numBitsLUT);
153 12181 : if (32 - bitPos < numBitsLUT)
154 : {
155 2292 : if (nBytesRemaining < 2 * sizeUInt)
156 0 : return false;
157 :
158 2292 : valTmp |= (*(*ppSrc + 1)) >> (64 - bitPos - numBitsLUT);
159 : }
160 :
161 12181 : if (m_decodeLUT[valTmp].first >= 0) // if there, move the correct number of bits and done
162 : {
163 12181 : value = m_decodeLUT[valTmp].second;
164 12181 : bitPos += m_decodeLUT[valTmp].first;
165 12181 : if (bitPos >= 32)
166 : {
167 519 : bitPos -= 32;
168 519 : (*ppSrc)++;
169 519 : nBytesRemaining -= sizeUInt;
170 : }
171 12181 : return true;
172 : }
173 :
174 : // if not there, go through the tree (slower)
175 :
176 0 : if (!m_root)
177 0 : return false;
178 :
179 : // skip leading 0 bits before entering the tree
180 0 : bitPos += m_numBitsToSkipInTree;
181 0 : if (bitPos >= 32)
182 : {
183 0 : bitPos -= 32;
184 0 : (*ppSrc)++;
185 0 : nBytesRemaining -= sizeUInt;
186 : }
187 :
188 0 : const Node* node = m_root;
189 0 : value = -1;
190 0 : while (value < 0 && nBytesRemaining >= sizeUInt)
191 : {
192 0 : int bit = ((**ppSrc) << bitPos) >> 31;
193 0 : bitPos++;
194 0 : if (bitPos == 32)
195 : {
196 0 : bitPos = 0;
197 0 : (*ppSrc)++;
198 0 : nBytesRemaining -= sizeUInt;
199 : }
200 :
201 0 : node = bit ? node->child1 : node->child0;
202 0 : if (!node)
203 0 : return false;
204 :
205 0 : if (node->value >= 0) // reached a leaf node
206 0 : value = node->value;
207 : }
208 :
209 0 : return (value >= 0);
210 : }
211 :
212 : // -------------------------------------------------------------------------- ;
213 :
214 4873280 : inline bool Huffman::DecodeOneValue_NoOverrunCheck(const unsigned int** ppSrc, size_t& nBytesRemaining, int& bitPos, int numBitsLUT, int& value) const
215 : {
216 4873280 : const size_t sizeUInt = sizeof(unsigned int);
217 :
218 4873280 : if (!ppSrc || !(*ppSrc) || bitPos < 0 || bitPos >= 32)
219 0 : return false;
220 :
221 : // first get the next (up to) 12 bits as a copy
222 4874900 : int valTmp = ((**ppSrc) << bitPos) >> (32 - numBitsLUT);
223 4874900 : if (32 - bitPos < numBitsLUT)
224 : {
225 1329840 : valTmp |= (*(*ppSrc + 1)) >> (64 - bitPos - numBitsLUT);
226 : }
227 :
228 4874900 : if (m_decodeLUT[valTmp].first >= 0) // if there, move the correct number of bits and done
229 : {
230 4872490 : value = m_decodeLUT[valTmp].second;
231 4871850 : bitPos += m_decodeLUT[valTmp].first;
232 4871640 : if (bitPos >= 32)
233 : {
234 431648 : bitPos -= 32;
235 431648 : (*ppSrc)++;
236 431648 : nBytesRemaining -= sizeUInt;
237 : }
238 4871640 : return true;
239 : }
240 :
241 : // if not there, go through the tree (slower)
242 :
243 2254 : if (!m_root)
244 0 : return false;
245 :
246 : // skip leading 0 bits before entering the tree
247 2254 : bitPos += m_numBitsToSkipInTree;
248 2254 : if (bitPos >= 32)
249 : {
250 498 : bitPos -= 32;
251 498 : (*ppSrc)++;
252 498 : nBytesRemaining -= sizeUInt;
253 : }
254 :
255 2254 : const Node* node = m_root;
256 2254 : value = -1;
257 16084 : while (value < 0)
258 : {
259 13830 : int bit = ((**ppSrc) << bitPos) >> 31;
260 13830 : bitPos++;
261 13830 : if (bitPos == 32)
262 : {
263 432 : bitPos = 0;
264 432 : (*ppSrc)++;
265 432 : nBytesRemaining -= sizeUInt;
266 : }
267 :
268 13830 : node = bit ? node->child1 : node->child0;
269 13830 : if (!node)
270 0 : return false;
271 :
272 13830 : if (node->value >= 0) // reached a leaf node
273 2254 : value = node->value;
274 : }
275 :
276 2254 : return (value >= 0);
277 : }
278 :
279 : // -------------------------------------------------------------------------- ;
280 :
281 : NAMESPACE_LERC_END
282 : #endif
|