LCOV - code coverage report
Current view: top level - frmts/mrf - Packer_RLE.cpp (source / functions) Hit Total Coverage
Test: gdal_filtered.info Lines: 78 84 92.9 %
Date: 2025-08-01 10:10:57 Functions: 6 6 100.0 %

          Line data    Source code
       1             : /*
       2             : Copyright 2016-2021 Esri
       3             : Licensed under the Apache License, Version 2.0 (the "License");
       4             : you may not use this file except in compliance with the License.
       5             : You may obtain a copy of the License at
       6             : http://www.apache.org/licenses/LICENSE-2.0
       7             : Unless required by applicable law or agreed to in writing, software
       8             : distributed under the License is distributed on an "AS IS" BASIS,
       9             : WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      10             : See the License for the specific language governing permissions and
      11             : limitations under the License.
      12             : 
      13             : Contributors:  Lucian Plesea
      14             : */
      15             : 
      16             : //
      17             : // Implements an RLE codec packer.  The RLE uses a dedicated marker code
      18             : // This particular packer picks the least used byte value as marker,
      19             : // which makes the worst case data expansion more reasonable, at the expense
      20             : // of taking two passes over the input data
      21             : //
      22             : 
      23             : // For memset
      24             : #include <cstring>
      25             : #include <vector>
      26             : #include <algorithm>
      27             : 
      28             : #include "marfa.h"
      29             : #include "Packer_RLE.h"
      30             : 
      31             : NAMESPACE_MRF_START
      32             : 
      33             : Packer::~Packer() = default;
      34             : 
      35             : //
      36             : // RLE yarn codec, uses a dedicated code as marker, default value is 0xC3
      37             : // This is the C implementation, there is also a C++ one
      38             : // For yarnball compression, it performs better than using a double char marker
      39             : //
      40             : // Includes both encoder and decoder
      41             : // Worst case input:output ratio is 1:2, when the input is 1,2 or three marker
      42             : // codes For long input streams, the input:output ratio is between 13260.6:1
      43             : // (best) and 1:1.75 (worst)
      44             : //
      45             : // Could be a byte stream filter which needs very little local storage
      46             : //
      47             : 
      48             : constexpr int MAX_RUN = 768 + 0xffff;
      49             : typedef unsigned char Byte;
      50             : 
      51             : #define UC(X) static_cast<Byte>(X)
      52             : 
      53             : // Encode helper function
      54             : // It returns how many times the byte at *s is repeated
      55             : // a value between 1 and min(max_count, MAX_RUN)
      56        1340 : inline static int run_length(const Byte *s, int max_count)
      57             : {
      58        1340 :     if (max_count > MAX_RUN)
      59           0 :         max_count = MAX_RUN;
      60        1340 :     const Byte c = *s++;
      61      393387 :     for (int count = 1; count < max_count; count++)
      62      393375 :         if (c != *s++)
      63        1328 :             return count;
      64          12 :     return max_count;
      65             : }
      66             : 
      67             : #define RET_NOW                                                                \
      68             :     do                                                                         \
      69             :     {                                                                          \
      70             :         return static_cast<size_t>(next - reinterpret_cast<Byte *>(obuf));     \
      71             :     } while (0)
      72             : 
      73             : //
      74             : // C compress function, returns compressed size
      75             : // len is the size of the input buffer
      76             : // caller should ensure that output buffer is at least 2 * N to be safe,
      77             : // dropping to N * 7/4 for larger input
      78             : // If the Code is chosen to be the least present value in input, the
      79             : // output size requirement is bound by N / 256 + N
      80             : //
      81          12 : static size_t toYarn(const char *ibuffer, char *obuf, size_t len,
      82             :                      Byte CODE = 0xC3)
      83             : {
      84          12 :     Byte *next = reinterpret_cast<Byte *>(obuf);
      85             : 
      86        1352 :     while (len)
      87             :     {
      88        1340 :         Byte b = static_cast<Byte>(*ibuffer);
      89        1340 :         int run = run_length(reinterpret_cast<const Byte *>(ibuffer),
      90             :                              static_cast<int>(len));
      91        1340 :         if (run < 4)
      92             :         {  // Encoded as single bytes, stored as such, CODE followed by a zero
      93         482 :             run = 1;
      94         482 :             *next++ = b;
      95         482 :             if (CODE == b)
      96           0 :                 *next++ = 0;
      97             :         }
      98             :         else
      99             :         {                    // Encoded as a sequence
     100         858 :             *next++ = CODE;  // Start with Marker code, always present
     101             : 
     102         858 :             if (run > 767)
     103             :             {                    // long sequence
     104          12 :                 ibuffer += 768;  // May be unsafe to read *ibuffer
     105          12 :                 len -= 768;
     106          12 :                 run -= 768;
     107          12 :                 *next++ = 3;
     108          12 :                 *next++ = UC(run >> 8);  // Forced high count
     109             :             }
     110         846 :             else if (run > 255)
     111             :             {                            // Between 256 and 767, medium sequence
     112         292 :                 *next++ = UC(run >> 8);  // High count
     113             :             }
     114             : 
     115         858 :             *next++ = UC(run & 0xff);  // Low count, always present
     116         858 :             *next++ = b;               // End with Value, always present
     117             :         }
     118        1340 :         ibuffer += run;
     119        1340 :         len -= run;
     120             :     }
     121          12 :     RET_NOW;
     122             : }
     123             : 
     124             : // Check that another input byte can be read, return now otherwise
     125             : // Adjusts the input length too
     126             : #define CHECK_INPUT                                                            \
     127             :     if (ilen == 0)                                                             \
     128             :     RET_NOW
     129             : // Reads a byte and adjust the input pointer
     130             : #define NEXT_BYTE UC(*ibuffer++)
     131             : 
     132             : //
     133             : // C decompress function, returns actual decompressed size
     134             : // Stops when either olen is reached or when ilen is exhausted
     135             : // returns the number of output bytes written
     136             : //
     137           8 : static size_t fromYarn(const char *ibuffer, size_t ilen, char *obuf,
     138             :                        size_t olen, Byte CODE = 0xC3)
     139             : {
     140           8 :     Byte *next = reinterpret_cast<Byte *>(obuf);
     141         799 :     while (ilen > 0 && olen > 0)
     142             :     {
     143             :         // It is safe to read and write one byte
     144         791 :         Byte b = NEXT_BYTE;
     145         791 :         ilen--;
     146         791 :         if (b != CODE)
     147             :         {  // Copy single chars
     148         314 :             *next++ = b;
     149         314 :             olen--;
     150             :         }
     151             :         else
     152             :         {  // Marker found, which type of sequence is it?
     153         477 :             CHECK_INPUT;
     154         477 :             b = NEXT_BYTE;
     155         477 :             ilen--;
     156         477 :             if (b == 0)
     157             :             {  // Emit one code
     158           0 :                 *next++ = CODE;
     159           0 :                 olen--;
     160             :             }
     161             :             else
     162             :             {  // Sequence
     163         477 :                 size_t run = 0;
     164         477 :                 if (b < 4)
     165             :                 {
     166         164 :                     run = 256 * b;
     167         164 :                     if (3 == b)
     168             :                     {  // Second byte of high count
     169           8 :                         CHECK_INPUT;
     170           8 :                         run += 256 * NEXT_BYTE;
     171           8 :                         ilen--;
     172             :                     }
     173         164 :                     CHECK_INPUT;
     174         164 :                     run += NEXT_BYTE;
     175         164 :                     ilen--;
     176             :                 }
     177             :                 else
     178             :                 {  // Single byte count
     179         313 :                     run = b;
     180             :                 }
     181             : 
     182             :                 // Write the sequence out, after checking
     183         477 :                 if (olen < run)
     184           0 :                     RET_NOW;
     185         477 :                 CHECK_INPUT;
     186         477 :                 b = NEXT_BYTE;
     187         477 :                 ilen--;
     188         477 :                 memset(next, b, run);
     189             : 
     190         477 :                 next += run;
     191         477 :                 olen -= run;
     192             :             }
     193             :         }
     194             :     }
     195           8 :     RET_NOW;
     196             : }
     197             : 
     198             : // Returns the least used byte value from a buffer
     199          12 : static Byte getLeastUsed(const Byte *src, size_t len)
     200             : {
     201          12 :     std::vector<unsigned int> hist(256, 0);
     202      393228 :     while (len)
     203             :     {
     204      393216 :         --len;
     205      393216 :         hist[*src++]++;
     206             :     }
     207             :     const size_t nIdxMin =
     208          12 :         std::min_element(hist.begin(), hist.end()) - hist.begin();
     209          24 :     return UC(nIdxMin);
     210             : }
     211             : 
     212             : // Read from a packed source until the src is exhausted
     213             : // Returns true if all output buffer was filled, 0 otherwise
     214           8 : int RLEC3Packer::load(storage_manager *src, storage_manager *dst)
     215             : {
     216             :     // Use the first char in the input buffer as marker code
     217          16 :     return dst->size == fromYarn(src->buffer + 1, src->size - 1, dst->buffer,
     218           8 :                                  dst->size, *src->buffer);
     219             : }
     220             : 
     221             : //
     222             : // Picks the least use value as the marker code and stores it first in the data
     223             : // This choice improves the worst case expansion, which becomes (1 + N / 256 +
     224             : // N) : N It also improves compression in general Makes best case marginally
     225             : // worse because the chosen code adds one byte to the output
     226             : //
     227             : 
     228          12 : int RLEC3Packer::store(storage_manager *src, storage_manager *dst)
     229             : {
     230          12 :     size_t N = src->size;
     231          12 :     if (dst->size < 1 + N + N / 256)
     232           0 :         return 0;  // Failed, destination might overflow
     233             :     Byte c =
     234          12 :         getLeastUsed(reinterpret_cast<const Byte *>(src->buffer), src->size);
     235          12 :     *dst->buffer++ = static_cast<char>(c);
     236          12 :     dst->size = 1 + toYarn(src->buffer, dst->buffer, src->size, c);
     237          12 :     return 1;  // Success, size is in dst->size
     238             : }
     239             : 
     240             : NAMESPACE_MRF_END

Generated by: LCOV version 1.14