LCOV - code coverage report
Current view: top level - frmts/icechunk - icechunkutils.cpp (source / functions) Hit Total Coverage
Test: gdal_filtered.info Lines: 87 94 92.6 %
Date: 2026-06-19 21:24:00 Functions: 3 3 100.0 %

          Line data    Source code
       1             : /******************************************************************************
       2             :  *
       3             :  * Project:  GDAL
       4             :  * Purpose:  Icechunk driver
       5             :  * Author:   Even Rouault <even dot rouault at spatialys.com>
       6             :  *
       7             :  ******************************************************************************
       8             :  * Copyright (c) 2026, Even Rouault <even dot rouault at spatialys.com>
       9             :  *
      10             :  * SPDX-License-Identifier: MIT
      11             :  ****************************************************************************/
      12             : 
      13             : #include "icechunkutils.h"
      14             : #include "icechunkdrivercore.h"
      15             : 
      16             : #include "cpl_compressor.h"
      17             : 
      18             : #include <limits>
      19             : 
      20             : namespace gdal::icechunk
      21             : {
      22             : 
      23             : /************************************************************************/
      24             : /*                     GetFilenameFromDatasetName()                     */
      25             : /************************************************************************/
      26             : 
      27        8473 : std::string GetFilenameFromDatasetName(const std::string &osDatasetName,
      28             :                                        std::string &osBranchName,
      29             :                                        std::string &osTagName)
      30             : {
      31       16946 :     std::string osFilename = osDatasetName;
      32        8473 :     if (STARTS_WITH_CI(osFilename.c_str(), ICECHUNK_PREFIX))
      33             :     {
      34           8 :         osFilename = osDatasetName.substr(strlen(ICECHUNK_PREFIX));
      35           8 :         const size_t nQuestionMarkPos = osFilename.find('?');
      36           8 :         if (nQuestionMarkPos != std::string::npos)
      37             :         {
      38           5 :             std::string osSuffix = osFilename.substr(nQuestionMarkPos + 1);
      39           5 :             if (cpl::starts_with(osSuffix, "branch="))
      40             :             {
      41           2 :                 osFilename.resize(nQuestionMarkPos);
      42           2 :                 osBranchName = osSuffix.substr(strlen("branch="));
      43             :             }
      44           3 :             else if (cpl::starts_with(osSuffix, "tag="))
      45             :             {
      46           2 :                 osFilename.resize(nQuestionMarkPos);
      47           2 :                 osTagName = osSuffix.substr(strlen("tag="));
      48             :             }
      49             :             else
      50             :             {
      51           1 :                 CPLError(CE_Failure, CPLE_AppDefined,
      52             :                          "Invalid Icechunk connection string");
      53           1 :                 return {};
      54             :             }
      55             :         }
      56             :     }
      57        8472 :     return osFilename;
      58             : }
      59             : 
      60             : /************************************************************************/
      61             : /*                           DecompressFile()                           */
      62             : /************************************************************************/
      63             : 
      64             : /** Read and decompress (if needed) the specified file.
      65             :  *
      66             :  * @param pszFilename Filename. Must NOT be null.
      67             :  * @param poFile Already opened file handle. Must NOT be null.
      68             :  * @param nExpectedFileType Expected file type.
      69             :  * @param[out] pnVersion File specification version. May be null
      70             :  *
      71             :  * @return tuple (pointer to content, size) or (nullptr, 0) in case of error.
      72             :  */
      73             : std::pair<std::unique_ptr<unsigned char, VSIFreeReleaser>, size_t>
      74        6486 : DecompressFile(const char *pszFilename, VSIVirtualHandle *poFile,
      75             :                int nExpectedFileType, int *pnVersion)
      76             : {
      77        6486 :     std::pair<std::unique_ptr<unsigned char, VSIFreeReleaser>, size_t> ret{
      78             :         nullptr, 0};
      79             : 
      80        6486 :     const CPLCompressor *psZSTDDecompressor = CPLGetDecompressor("zstd");
      81        6486 :     CPLAssert(psZSTDDecompressor);
      82             : 
      83        6486 :     poFile->Seek(0, SEEK_END);
      84        6486 :     const vsi_l_offset nSize64 = poFile->Tell();
      85        6486 :     if (nSize64 < HEADER_SIZE)
      86             :     {
      87           2 :         CPLError(CE_Failure, CPLE_NotSupported, "%s: too small file",
      88             :                  pszFilename);
      89           2 :         return ret;
      90             :     }
      91        6484 :     if (nSize64 > std::numeric_limits<size_t>::max() / 2)
      92             :     {
      93           0 :         CPLError(CE_Failure, CPLE_NotSupported, "%s: too large file",
      94             :                  pszFilename);
      95           0 :         return ret;
      96             :     }
      97             : 
      98        6484 :     const size_t nSize = static_cast<size_t>(nSize64);
      99        6484 :     ret.first.reset(static_cast<unsigned char *>(VSI_MALLOC_VERBOSE(nSize)));
     100        6484 :     if (!ret.first)
     101           0 :         return ret;
     102             : 
     103        6484 :     auto *pabyRaw = ret.first.get();
     104        6484 :     if (poFile->Seek(0, SEEK_SET) != 0 || poFile->Read(pabyRaw, nSize) != nSize)
     105             :     {
     106           0 :         CPLError(CE_Failure, CPLE_FileIO, "%s: cannot ingest file",
     107             :                  pszFilename);
     108           0 :         return ret;
     109             :     }
     110             : 
     111             :     if constexpr (IS_DEBUG_BUILD)
     112             :     {
     113        6484 :         if (nExpectedFileType == FILE_TYPE_REPO_INFO)
     114             :         {
     115        4398 :             std::string osImplementationName;
     116             :             osImplementationName.assign(
     117        2199 :                 reinterpret_cast<const char *>(pabyRaw + SIG_SIZE),
     118        2199 :                 IMPLEMENTATION_NAME_SIZE);
     119        2199 :             osImplementationName.resize(strlen(osImplementationName.c_str()));
     120        2199 :             CPLDebugOnly("Icechunk", "Implementation name = '%s'",
     121             :                          osImplementationName.c_str());
     122             :         }
     123             :     }
     124             : 
     125        6484 :     const int nVersion = pabyRaw[SIG_SIZE + IMPLEMENTATION_NAME_SIZE];
     126        6484 :     if (nVersion != 1 && nVersion != 2)
     127             :     {
     128           3 :         CPLError(CE_Failure, CPLE_NotSupported,
     129             :                  "%s: Icechunk version %d not supported", pszFilename,
     130             :                  nVersion);
     131           3 :         return ret;
     132             :     }
     133        6481 :     if (pnVersion)
     134        6481 :         *pnVersion = nVersion;
     135             : 
     136        6481 :     const int nFileType =
     137        6481 :         pabyRaw[SIG_SIZE + IMPLEMENTATION_NAME_SIZE + SPEC_VERSION_SIZE];
     138        6481 :     if (nFileType != nExpectedFileType)
     139             :     {
     140           1 :         CPLError(CE_Failure, CPLE_NotSupported,
     141             :                  "%s: Got file type %d, expected %d", pszFilename, nFileType,
     142             :                  nExpectedFileType);
     143           1 :         return ret;
     144             :     }
     145             : 
     146        6480 :     const int nCompressionAlgo = pabyRaw[SIG_SIZE + IMPLEMENTATION_NAME_SIZE +
     147        6480 :                                          SPEC_VERSION_SIZE + FILE_TYPE_SIZE];
     148        6480 :     if (nCompressionAlgo != COMPRESSION_ALGO_NONE &&
     149             :         nCompressionAlgo != COMPRESSION_ALGO_ZSTD)
     150             :     {
     151           0 :         CPLError(CE_Failure, CPLE_NotSupported,
     152             :                  "%s: Icechunk compression algorithm %d not supported",
     153             :                  pszFilename, nCompressionAlgo);
     154           0 :         return ret;
     155             :     }
     156             : 
     157        6480 :     const auto *pabyRawPastHeader = pabyRaw + HEADER_SIZE;
     158        6480 :     const auto nSizePastHeader = nSize - HEADER_SIZE;
     159             : 
     160        6480 :     if (nCompressionAlgo == COMPRESSION_ALGO_ZSTD)
     161             :     {
     162        6328 :         size_t nUncompressedSize = 0;
     163        6328 :         void *pabyUncompressed = nullptr;
     164        6328 :         if (!psZSTDDecompressor->pfnFunc(
     165             :                 pabyRawPastHeader, nSizePastHeader, &pabyUncompressed,
     166        6328 :                 &nUncompressedSize, nullptr, psZSTDDecompressor->user_data))
     167             :         {
     168           2 :             CPLError(CE_Failure, CPLE_AppDefined,
     169             :                      "%s: ZSTD decompression failed", pszFilename);
     170           2 :             ret.first.reset();
     171           2 :             return ret;
     172             :         }
     173             : 
     174        6326 :         ret.first.reset(static_cast<unsigned char *>(pabyUncompressed));
     175        6326 :         ret.second = nUncompressedSize;
     176             :     }
     177             :     else
     178             :     {
     179         152 :         memmove(pabyRaw, pabyRawPastHeader, nSizePastHeader);
     180         152 :         ret.second = nSizePastHeader;
     181             :     }
     182             : 
     183        6478 :     return ret;
     184             : }
     185             : 
     186             : /************************************************************************/
     187             : /*                       CrockfordBase32Encode()                        */
     188             : /************************************************************************/
     189             : 
     190             : /** Encode the provided binary buffer as a Crockford Base32 string.
     191             :  *
     192             :  * Cf https://www.crockford.com/base32.html
     193             :  */
     194       23838 : std::string CrockfordBase32Encode(const uint8_t *data, size_t size)
     195             : {
     196       23838 :     std::string ret;
     197             :     // Omit I, L, O and U
     198       23838 :     constexpr char szDict[] = "0123456789ABCDEFGHJKMNPQRSTVWXYZ";
     199             :     static_assert(sizeof(szDict) - 1 == 32);
     200       23838 :     size_t i = 0;
     201       23838 :     unsigned currentVal = 0;
     202       23838 :     unsigned currentBitsCount = 0;
     203       23838 :     constexpr unsigned SYMBOL_BITS = 5;
     204             :     while (true)
     205             :     {
     206      461580 :         if (currentBitsCount < SYMBOL_BITS)
     207             :         {
     208             :             // Extra iteration when i == size is intentional
     209      307720 :             if (i > size)
     210       23838 :                 break;
     211      283882 :             currentVal = (currentVal << 8) | (i < size ? data[i] : 0);
     212      283882 :             ++i;
     213      283882 :             currentBitsCount += 8;
     214             :         }
     215      437742 :         const unsigned int rightShift = currentBitsCount - SYMBOL_BITS;
     216      437742 :         const unsigned dictIdx = currentVal >> rightShift;
     217      437742 :         CPLAssert(dictIdx < 32);
     218      437742 :         ret += szDict[dictIdx];
     219             :         // Zero out the 5 left-most valid bits (that we just consumed)
     220      437742 :         currentVal &= ~(31U << rightShift);
     221      437742 :         currentBitsCount -= SYMBOL_BITS;
     222      437742 :     }
     223       47676 :     return ret;
     224             : }
     225             : 
     226             : }  // namespace gdal::icechunk

Generated by: LCOV version 1.14