Line data Source code
1 : /******************************************************************************
2 : *
3 : * Project: Raster Matrix Format
4 : * Purpose: Implementation of the LZW compression algorithm as used in
5 : * GIS "Panorama"/"Integratsia" raster files. Based on implementation
6 : * of Kent Williams, but heavily modified over it. The key point
7 : * in the initial implementation is a hashing algorithm.
8 : * Author: Andrey Kiselev, dron@ak4719.spb.edu
9 : *
10 : ******************************************************************************
11 : * Copyright (c) 2007, Andrey Kiselev <dron@ak4719.spb.edu>
12 : *
13 : * Permission is hereby granted, free of charge, to any person obtaining a
14 : * copy of this software and associated documentation files (the "Software"),
15 : * to deal in the Software without restriction, including without limitation
16 : * the rights to use, copy, modify, merge, publish, distribute, sublicense,
17 : * and/or sell copies of the Software, and to permit persons to whom the
18 : * Software is furnished to do so, subject to the following conditions:
19 : *
20 : * The above copyright notice and this permission notice shall be included
21 : * in all copies or substantial portions of the Software.
22 : *
23 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
24 : * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
26 : * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 : * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
28 : * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
29 : * DEALINGS IN THE SOFTWARE.
30 : ******************************************************************************
31 : * COPYRIGHT NOTICE FROM THE INITIAL IMPLEMENTATION:
32 : *
33 : * The programs LZWCOM and LZWUNC, both in binary executable and source forms,
34 : * are in the public domain. No warranty is given or implied, and no
35 : * liability will be assumed by the author.
36 : *
37 : * Everyone on earth is hereby given permission to use, copy, distribute,
38 : * change, mangle, destroy or otherwise employ these programs, provided they
39 : * hurt no one but themselves in the process.
40 : *
41 : * Kent Williams
42 : * Norand Inc.
43 : * 550 2nd St S.E.
44 : * Cedar Rapids, Iowa 52401
45 : * (319) 369-3131
46 : ****************************************************************************/
47 :
48 : #include "cpl_conv.h"
49 :
50 : #include "rmfdataset.h"
51 :
52 : // Code marks that there is no predecessor in the string
53 : constexpr GUInt32 NO_PRED = 0xFFFF;
54 :
55 : // We are using 12-bit codes in this particular implementation
56 : constexpr GUInt32 TABSIZE = 4096U;
57 : constexpr GUInt32 STACKSIZE = TABSIZE;
58 :
59 : constexpr GUInt32 NOT_FND = 0xFFFF;
60 :
61 : /************************************************************************/
62 : /* LZWStringTab */
63 : /************************************************************************/
64 :
65 : typedef struct
66 : {
67 : bool bUsed;
68 : GUInt32 iNext; // hi bit is 'used' flag
69 : GUInt32 iPredecessor; // 12 bit code
70 : GByte iFollower;
71 : } LZWStringTab;
72 :
73 : /************************************************************************/
74 : /* LZWUpdateTab() */
75 : /************************************************************************/
76 :
77 : CPL_NOSANITIZE_UNSIGNED_INT_OVERFLOW
78 16751100 : static GUInt32 UnsanitizedMul(GUInt32 a, GUInt32 b)
79 : {
80 16751100 : return a * b;
81 : }
82 :
83 17072400 : static int UnsignedByteToSignedByte(GByte byVal)
84 : {
85 17072400 : return byVal >= 128 ? byVal - 256 : byVal;
86 : }
87 :
88 979120 : static void LZWUpdateTab(LZWStringTab *poCodeTab, GUInt32 iPred, GByte bFollow)
89 : {
90 : /* -------------------------------------------------------------------- */
91 : /* Hash uses the 'mid-square' algorithm. I.E. for a hash val of n bits */
92 : /* hash = middle binary digits of (key * key). Upon collision, hash */
93 : /* searches down linked list of keys that hashed to that key already. */
94 : /* It will NOT notice if the table is full. This must be handled */
95 : /* elsewhere */
96 : /* -------------------------------------------------------------------- */
97 979120 : const int iFollow = UnsignedByteToSignedByte(bFollow);
98 979685 : GUInt32 nLocal = CPLUnsanitizedAdd<GUInt32>(iPred, iFollow) | 0x0800;
99 979539 : nLocal = (UnsanitizedMul(nLocal, nLocal) >> 6) &
100 : 0x0FFF; // middle 12 bits of result
101 :
102 : // If string is not used
103 979297 : GUInt32 nNext = nLocal;
104 979297 : if (poCodeTab[nLocal].bUsed)
105 : {
106 : // If collision has occurred
107 1918490 : while ((nNext = poCodeTab[nLocal].iNext) != 0)
108 1256440 : nLocal = nNext;
109 :
110 : // Search for free entry from nLocal + 101
111 662049 : nNext = (nLocal + 101) & 0x0FFF;
112 33685600 : while (poCodeTab[nNext].bUsed)
113 : {
114 33023600 : if (++nNext >= TABSIZE)
115 2788 : nNext = 0;
116 : }
117 :
118 : // Put new tempnext into last element in collision list
119 662049 : poCodeTab[nLocal].iNext = nNext;
120 : }
121 :
122 979297 : poCodeTab[nNext].bUsed = true;
123 979297 : poCodeTab[nNext].iNext = 0;
124 979297 : poCodeTab[nNext].iPredecessor = iPred;
125 979297 : poCodeTab[nNext].iFollower = bFollow;
126 979297 : }
127 :
128 : /************************************************************************/
129 : /* LZWCreateTab() */
130 : /************************************************************************/
131 :
132 261 : static LZWStringTab *LZWCreateTab()
133 : {
134 : // Allocate space for the new table and pre-fill it
135 : LZWStringTab *poCodeTab =
136 261 : (LZWStringTab *)CPLMalloc(TABSIZE * sizeof(LZWStringTab));
137 :
138 261 : memset(poCodeTab, 0, TABSIZE * sizeof(LZWStringTab));
139 :
140 67077 : for (GUInt32 iCode = 0; iCode < 256; ++iCode)
141 66816 : LZWUpdateTab(poCodeTab, NO_PRED, static_cast<GByte>(iCode));
142 :
143 261 : return poCodeTab;
144 : }
145 :
146 : /************************************************************************/
147 : /* LZWFindIndex() */
148 : /************************************************************************/
149 :
150 15774900 : static GUInt32 LZWFindIndex(const LZWStringTab *poCodeTab, GUInt32 iPred,
151 : GByte bFollow)
152 : {
153 15774900 : const int iFollow = UnsignedByteToSignedByte(bFollow);
154 16467600 : GUInt32 nLocal = CPLUnsanitizedAdd<GUInt32>(iPred, iFollow) | 0x0800;
155 16406500 : nLocal = (UnsanitizedMul(nLocal, nLocal) >> 6) &
156 : 0x0FFF; // middle 12 bits of result
157 :
158 23281800 : do
159 : {
160 32911300 : CPLAssert(nLocal < TABSIZE);
161 32911300 : if (poCodeTab[nLocal].iPredecessor == iPred &&
162 12993100 : poCodeTab[nLocal].iFollower == bFollow)
163 : {
164 13114200 : return nLocal;
165 : }
166 19797100 : nLocal = poCodeTab[nLocal].iNext;
167 19797100 : } while (nLocal > 0);
168 :
169 0 : return NOT_FND;
170 : }
171 :
172 : /************************************************************************/
173 : /* LZWPutCode() */
174 : /************************************************************************/
175 :
176 4879060 : static bool LZWPutCode(GUInt32 iCode, GUInt32 &iTmp, bool &bBitsleft,
177 : GByte *&pabyCurrent, const GByte *const pabyOutEnd)
178 : {
179 4879060 : if (bBitsleft)
180 : {
181 2597570 : if (pabyCurrent >= pabyOutEnd)
182 : {
183 0 : return false;
184 : }
185 2597570 : *(pabyCurrent++) = static_cast<GByte>((iCode >> 4) & 0xFF);
186 2597570 : iTmp = iCode & 0x000F;
187 2597570 : bBitsleft = false;
188 : }
189 : else
190 : {
191 2281500 : if (pabyCurrent + 1 >= pabyOutEnd)
192 : {
193 28 : return false;
194 : }
195 2281470 : *(pabyCurrent++) =
196 2281470 : static_cast<GByte>(((iTmp << 4) & 0xFF0) + ((iCode >> 8) & 0x00F));
197 2281470 : *(pabyCurrent++) = static_cast<GByte>(iCode & 0xFF);
198 2281470 : bBitsleft = true;
199 : }
200 4879040 : return true;
201 : }
202 :
203 : /************************************************************************/
204 : /* LZWReadStream() */
205 : /************************************************************************/
206 :
207 137 : static size_t LZWReadStream(const GByte *pabyIn, GUInt32 nSizeIn,
208 : GByte *pabyOut, GUInt32 nSizeOut,
209 : LZWStringTab *poCodeTab)
210 : {
211 137 : GByte *const pabyOutBegin = pabyOut;
212 :
213 : // The first code is always known
214 137 : GUInt32 iCode = (*pabyIn++ << 4) & 0xFF0;
215 137 : nSizeIn--;
216 137 : iCode += (*pabyIn >> 4) & 0x00F;
217 137 : GUInt32 iOldCode = iCode;
218 137 : bool bBitsleft = true;
219 :
220 137 : GByte iFinChar = poCodeTab[iCode].iFollower;
221 137 : nSizeOut--;
222 137 : *pabyOut++ = iFinChar;
223 :
224 137 : GUInt32 nCount = TABSIZE - 256;
225 :
226 : // Decompress the input buffer
227 4783380 : while (nSizeIn > 0)
228 : {
229 : // Fetch 12-bit code from input stream
230 4783300 : if (bBitsleft)
231 : {
232 2391720 : iCode = ((*pabyIn++ & 0x0F) << 8) & 0xF00;
233 2391720 : nSizeIn--;
234 2391720 : if (nSizeIn == 0)
235 57 : break;
236 2391660 : iCode += *pabyIn++;
237 2391660 : nSizeIn--;
238 2391660 : bBitsleft = FALSE;
239 : }
240 : else
241 : {
242 2391580 : iCode = (*pabyIn++ << 4) & 0xFF0;
243 2391580 : nSizeIn--;
244 2391580 : if (nSizeIn == 0)
245 0 : break;
246 2391580 : iCode += (*pabyIn >> 4) & 0x00F;
247 2391580 : bBitsleft = TRUE;
248 : }
249 :
250 4783250 : const GUInt32 iInCode = iCode;
251 4783250 : GByte bLastChar = 0; // TODO(schwehr): Why not nLastChar?
252 :
253 : // Do we have unknown code?
254 4783250 : bool bNewCode = false;
255 4783250 : if (!poCodeTab[iCode].bUsed)
256 : {
257 40101 : iCode = iOldCode;
258 40101 : bLastChar = iFinChar;
259 40101 : bNewCode = true;
260 : }
261 :
262 4783250 : GByte abyStack[STACKSIZE] = {};
263 4783250 : GByte *pabyTail = abyStack + STACKSIZE;
264 4783250 : GUInt32 nStackCount = 0;
265 :
266 16371000 : while (poCodeTab[iCode].iPredecessor != NO_PRED)
267 : {
268 : // Stack overrun
269 11587700 : if (nStackCount >= STACKSIZE)
270 0 : return 0;
271 : // Put the decoded character into stack
272 11587700 : *(--pabyTail) = poCodeTab[iCode].iFollower;
273 11587700 : nStackCount++;
274 11587700 : iCode = poCodeTab[iCode].iPredecessor;
275 : }
276 :
277 4783250 : if (!nSizeOut)
278 0 : return 0;
279 : // The first character
280 4783250 : iFinChar = poCodeTab[iCode].iFollower;
281 4783250 : nSizeOut--;
282 4783250 : *pabyOut++ = iFinChar;
283 :
284 : // Output buffer overrun
285 4783250 : if (nStackCount > nSizeOut)
286 0 : return 0;
287 :
288 : // Now copy the stack contents into output buffer. Our stack was
289 : // filled in reverse order, so no need in character reordering
290 4783250 : memcpy(pabyOut, pabyTail, nStackCount);
291 4783250 : nSizeOut -= nStackCount;
292 4783250 : pabyOut += nStackCount;
293 :
294 : // If code isn't known
295 4783250 : if (bNewCode)
296 : {
297 : // Output buffer overrun
298 40101 : if (!nSizeOut)
299 0 : return 0;
300 40101 : iFinChar = bLastChar; // the follower char of last
301 40101 : *pabyOut++ = iFinChar;
302 40101 : nSizeOut--;
303 : }
304 :
305 4783250 : if (nCount > 0)
306 : {
307 463213 : nCount--;
308 : // Add code to the table
309 463213 : LZWUpdateTab(poCodeTab, iOldCode, iFinChar);
310 : }
311 :
312 4783250 : iOldCode = iInCode;
313 : }
314 :
315 137 : return static_cast<size_t>(pabyOut - pabyOutBegin);
316 : }
317 :
318 : /************************************************************************/
319 : /* LZWDecompress() */
320 : /************************************************************************/
321 :
322 137 : size_t RMFDataset::LZWDecompress(const GByte *pabyIn, GUInt32 nSizeIn,
323 : GByte *pabyOut, GUInt32 nSizeOut, GUInt32,
324 : GUInt32)
325 : {
326 137 : if (pabyIn == nullptr || pabyOut == nullptr || nSizeIn < 2)
327 0 : return 0;
328 137 : LZWStringTab *poCodeTab = LZWCreateTab();
329 :
330 137 : size_t nRet = LZWReadStream(pabyIn, nSizeIn, pabyOut, nSizeOut, poCodeTab);
331 :
332 137 : CPLFree(poCodeTab);
333 :
334 137 : return nRet;
335 : }
336 :
337 : /************************************************************************/
338 : /* LZWWriteStream() */
339 : /************************************************************************/
340 :
341 124 : static size_t LZWWriteStream(const GByte *pabyIn, GUInt32 nSizeIn,
342 : GByte *pabyOut, GUInt32 nSizeOut,
343 : LZWStringTab *poCodeTab)
344 : {
345 : GUInt32 iCode;
346 124 : iCode = LZWFindIndex(poCodeTab, NO_PRED, *pabyIn++);
347 124 : nSizeIn--;
348 :
349 124 : GUInt32 nCount = TABSIZE - 256;
350 124 : GUInt32 iTmp = 0;
351 124 : bool bBitsleft = true;
352 124 : GByte *pabyCurrent = pabyOut;
353 124 : GByte *pabyOutEnd = pabyOut + nSizeOut;
354 :
355 13168100 : while (nSizeIn > 0)
356 : {
357 13168000 : const GByte bCurrentCode = *pabyIn++;
358 13168000 : nSizeIn--;
359 :
360 13168000 : GUInt32 iNextCode = LZWFindIndex(poCodeTab, iCode, bCurrentCode);
361 12986900 : if (iNextCode != NOT_FND)
362 : {
363 9271420 : iCode = iNextCode;
364 9271420 : continue;
365 : }
366 :
367 3715430 : if (!LZWPutCode(iCode, iTmp, bBitsleft, pabyCurrent, pabyOutEnd))
368 : {
369 28 : return 0;
370 : }
371 :
372 4960970 : if (nCount > 0)
373 : {
374 449292 : nCount--;
375 449292 : LZWUpdateTab(poCodeTab, iCode, bCurrentCode);
376 : }
377 :
378 4961390 : iCode = LZWFindIndex(poCodeTab, NO_PRED, bCurrentCode);
379 : }
380 :
381 96 : if (!LZWPutCode(iCode, iTmp, bBitsleft, pabyCurrent, pabyOutEnd))
382 : {
383 0 : return 0;
384 : }
385 :
386 96 : if (!bBitsleft)
387 : {
388 52 : if (pabyCurrent >= pabyOutEnd)
389 : {
390 0 : return 0;
391 : }
392 52 : *(pabyCurrent++) = static_cast<GByte>((iTmp << 4) & 0xFF0);
393 : }
394 :
395 96 : return static_cast<size_t>(pabyCurrent - pabyOut);
396 : }
397 :
398 : /************************************************************************/
399 : /* LZWCompress() */
400 : /************************************************************************/
401 :
402 124 : size_t RMFDataset::LZWCompress(const GByte *pabyIn, GUInt32 nSizeIn,
403 : GByte *pabyOut, GUInt32 nSizeOut, GUInt32,
404 : GUInt32, const RMFDataset *)
405 : {
406 124 : if (pabyIn == nullptr || pabyOut == nullptr || nSizeIn == 0)
407 0 : return 0;
408 :
409 : // Allocate space for the new table and pre-fill it
410 124 : LZWStringTab *poCodeTab = LZWCreateTab();
411 :
412 : size_t nWritten =
413 124 : LZWWriteStream(pabyIn, nSizeIn, pabyOut, nSizeOut, poCodeTab);
414 :
415 124 : CPLFree(poCodeTab);
416 :
417 124 : return nWritten;
418 : }
|