LCOV - code coverage report
Current view: top level - frmts/mrf - Packer_RLE.cpp (source / functions) Hit Total Coverage
Test: gdal_filtered.info Lines: 78 84 92.9 %
Date: 2024-05-04 12:52:34 Functions: 6 6 100.0 %

          Line data    Source code
       1             : /*
       2             : Copyright 2016-2021 Esri
       3             : Licensed under the Apache License, Version 2.0 (the "License");
       4             : you may not use this file except in compliance with the License.
       5             : You may obtain a copy of the License at
       6             : http://www.apache.org/licenses/LICENSE-2.0
       7             : Unless required by applicable law or agreed to in writing, software
       8             : distributed under the License is distributed on an "AS IS" BASIS,
       9             : WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      10             : See the License for the specific language governing permissions and
      11             : limitations under the License.
      12             : 
      13             : Contributors:  Lucian Plesea
      14             : */
      15             : 
      16             : //
      17             : // Implements an RLE codec packer.  The RLE uses a dedicated marker code
      18             : // This particular packer picks the least used byte value as marker,
      19             : // which makes the worst case data expansion more reasonable, at the expense
      20             : // of taking two passes over the input data
      21             : //
      22             : 
      23             : // For memset
      24             : #include <cstring>
      25             : #include <vector>
      26             : #include <algorithm>
      27             : 
      28             : #include "marfa.h"
      29             : #include "Packer_RLE.h"
      30             : 
      31             : NAMESPACE_MRF_START
      32             : //
      33             : // RLE yarn codec, uses a dedicated code as marker, default value is 0xC3
      34             : // This is the C implementation, there is also a C++ one
      35             : // For yarnball compression, it performs better than using a double char marker
      36             : //
      37             : // Includes both encoder and decoder
      38             : // Worst case input:output ratio is 1:2, when the input is 1,2 or three marker
      39             : // codes For long input streams, the input:output ratio is between 13260.6:1
      40             : // (best) and 1:1.75 (worst)
      41             : //
      42             : // Could be a byte stream filter which needs very little local storage
      43             : //
      44             : 
      45             : constexpr int MAX_RUN = 768 + 0xffff;
      46             : typedef unsigned char Byte;
      47             : 
      48             : #define UC(X) static_cast<Byte>(X)
      49             : 
      50             : // Encode helper function
      51             : // It returns how many times the byte at *s is repeated
      52             : // a value between 1 and min(max_count, MAX_RUN)
      53        1340 : inline static int run_length(const Byte *s, int max_count)
      54             : {
      55        1340 :     if (max_count > MAX_RUN)
      56           0 :         max_count = MAX_RUN;
      57        1340 :     const Byte c = *s++;
      58      393387 :     for (int count = 1; count < max_count; count++)
      59      393375 :         if (c != *s++)
      60        1328 :             return count;
      61          12 :     return max_count;
      62             : }
      63             : 
      64             : #define RET_NOW                                                                \
      65             :     do                                                                         \
      66             :     {                                                                          \
      67             :         return static_cast<size_t>(next - reinterpret_cast<Byte *>(obuf));     \
      68             :     } while (0)
      69             : 
      70             : //
      71             : // C compress function, returns compressed size
      72             : // len is the size of the input buffer
      73             : // caller should ensure that output buffer is at least 2 * N to be safe,
      74             : // dropping to N * 7/4 for larger input
      75             : // If the Code is chosen to be the least present value in input, the
      76             : // output size requirement is bound by N / 256 + N
      77             : //
      78          12 : static size_t toYarn(const char *ibuffer, char *obuf, size_t len,
      79             :                      Byte CODE = 0xC3)
      80             : {
      81          12 :     Byte *next = reinterpret_cast<Byte *>(obuf);
      82             : 
      83        1352 :     while (len)
      84             :     {
      85        1340 :         Byte b = static_cast<Byte>(*ibuffer);
      86        1340 :         int run = run_length(reinterpret_cast<const Byte *>(ibuffer),
      87             :                              static_cast<int>(len));
      88        1340 :         if (run < 4)
      89             :         {  // Encoded as single bytes, stored as such, CODE followed by a zero
      90         482 :             run = 1;
      91         482 :             *next++ = b;
      92         482 :             if (CODE == b)
      93           0 :                 *next++ = 0;
      94             :         }
      95             :         else
      96             :         {                    // Encoded as a sequence
      97         858 :             *next++ = CODE;  // Start with Marker code, always present
      98             : 
      99         858 :             if (run > 767)
     100             :             {                    // long sequence
     101          12 :                 ibuffer += 768;  // May be unsafe to read *ibuffer
     102          12 :                 len -= 768;
     103          12 :                 run -= 768;
     104          12 :                 *next++ = 3;
     105          12 :                 *next++ = UC(run >> 8);  // Forced high count
     106             :             }
     107         846 :             else if (run > 255)
     108             :             {                            // Between 256 and 767, medium sequence
     109         292 :                 *next++ = UC(run >> 8);  // High count
     110             :             }
     111             : 
     112         858 :             *next++ = UC(run & 0xff);  // Low count, always present
     113         858 :             *next++ = b;               // End with Value, always present
     114             :         }
     115        1340 :         ibuffer += run;
     116        1340 :         len -= run;
     117             :     }
     118          12 :     RET_NOW;
     119             : }
     120             : 
     121             : // Check that another input byte can be read, return now otherwise
     122             : // Adjusts the input length too
     123             : #define CHECK_INPUT                                                            \
     124             :     if (ilen == 0)                                                             \
     125             :     RET_NOW
     126             : // Reads a byte and adjust the input pointer
     127             : #define NEXT_BYTE UC(*ibuffer++)
     128             : 
     129             : //
     130             : // C decompress function, returns actual decompressed size
     131             : // Stops when either olen is reached or when ilen is exhausted
     132             : // returns the number of output bytes written
     133             : //
     134           8 : static size_t fromYarn(const char *ibuffer, size_t ilen, char *obuf,
     135             :                        size_t olen, Byte CODE = 0xC3)
     136             : {
     137           8 :     Byte *next = reinterpret_cast<Byte *>(obuf);
     138         799 :     while (ilen > 0 && olen > 0)
     139             :     {
     140             :         // It is safe to read and write one byte
     141         791 :         Byte b = NEXT_BYTE;
     142         791 :         ilen--;
     143         791 :         if (b != CODE)
     144             :         {  // Copy single chars
     145         314 :             *next++ = b;
     146         314 :             olen--;
     147             :         }
     148             :         else
     149             :         {  // Marker found, which type of sequence is it?
     150         477 :             CHECK_INPUT;
     151         477 :             b = NEXT_BYTE;
     152         477 :             ilen--;
     153         477 :             if (b == 0)
     154             :             {  // Emit one code
     155           0 :                 *next++ = CODE;
     156           0 :                 olen--;
     157             :             }
     158             :             else
     159             :             {  // Sequence
     160         477 :                 size_t run = 0;
     161         477 :                 if (b < 4)
     162             :                 {
     163         164 :                     run = 256 * b;
     164         164 :                     if (3 == b)
     165             :                     {  // Second byte of high count
     166           8 :                         CHECK_INPUT;
     167           8 :                         run += 256 * NEXT_BYTE;
     168           8 :                         ilen--;
     169             :                     }
     170         164 :                     CHECK_INPUT;
     171         164 :                     run += NEXT_BYTE;
     172         164 :                     ilen--;
     173             :                 }
     174             :                 else
     175             :                 {  // Single byte count
     176         313 :                     run = b;
     177             :                 }
     178             : 
     179             :                 // Write the sequence out, after checking
     180         477 :                 if (olen < run)
     181           0 :                     RET_NOW;
     182         477 :                 CHECK_INPUT;
     183         477 :                 b = NEXT_BYTE;
     184         477 :                 ilen--;
     185         477 :                 memset(next, b, run);
     186             : 
     187         477 :                 next += run;
     188         477 :                 olen -= run;
     189             :             }
     190             :         }
     191             :     }
     192           8 :     RET_NOW;
     193             : }
     194             : 
     195             : // Returns the least used byte value from a buffer
     196          12 : static Byte getLeastUsed(const Byte *src, size_t len)
     197             : {
     198          12 :     std::vector<unsigned int> hist(256, 0);
     199      393228 :     while (len)
     200             :     {
     201      393216 :         --len;
     202      393216 :         hist[*src++]++;
     203             :     }
     204             :     const size_t nIdxMin =
     205          12 :         std::min_element(hist.begin(), hist.end()) - hist.begin();
     206          24 :     return UC(nIdxMin);
     207             : }
     208             : 
     209             : // Read from a packed source until the src is exhausted
     210             : // Returns true if all output buffer was filled, 0 otherwise
     211           8 : int RLEC3Packer::load(storage_manager *src, storage_manager *dst)
     212             : {
     213             :     // Use the first char in the input buffer as marker code
     214          16 :     return dst->size == fromYarn(src->buffer + 1, src->size - 1, dst->buffer,
     215           8 :                                  dst->size, *src->buffer);
     216             : }
     217             : 
     218             : //
     219             : // Picks the least use value as the marker code and stores it first in the data
     220             : // This choice improves the worst case expansion, which becomes (1 + N / 256 +
     221             : // N) : N It also improves compression in general Makes best case marginally
     222             : // worse because the chosen code adds one byte to the output
     223             : //
     224             : 
     225          12 : int RLEC3Packer::store(storage_manager *src, storage_manager *dst)
     226             : {
     227          12 :     size_t N = src->size;
     228          12 :     if (dst->size < 1 + N + N / 256)
     229           0 :         return 0;  // Failed, destination might overflow
     230             :     Byte c =
     231          12 :         getLeastUsed(reinterpret_cast<const Byte *>(src->buffer), src->size);
     232          12 :     *dst->buffer++ = static_cast<char>(c);
     233          12 :     dst->size = 1 + toYarn(src->buffer, dst->buffer, src->size, c);
     234          12 :     return 1;  // Success, size is in dst->size
     235             : }
     236             : 
     237             : NAMESPACE_MRF_END

Generated by: LCOV version 1.14