LCOV - code coverage report
Current view: top level - frmts/zarr - zarr_v3_codec_sharding.cpp (source / functions) Hit Total Coverage
Test: gdal_filtered.info Lines: 333 397 83.9 %
Date: 2026-02-11 08:43:47 Functions: 9 10 90.0 %

          Line data    Source code
       1             : /******************************************************************************
       2             :  *
       3             :  * Project:  GDAL
       4             :  * Purpose:  Zarr driver, "sharding_indexed" codec
       5             :  * Author:   Even Rouault <even dot rouault at spatialys.com>
       6             :  *
       7             :  ******************************************************************************
       8             :  * Copyright (c) 2026, Development Seed
       9             :  *
      10             :  * SPDX-License-Identifier: MIT
      11             :  ****************************************************************************/
      12             : 
      13             : #include "zarr_v3_codec.h"
      14             : 
      15             : #include "cpl_vsi_virtual.h"
      16             : 
      17             : #include <algorithm>
      18             : #include <cinttypes>
      19             : #include <limits>
      20             : 
      21             : // Implements https://zarr-specs.readthedocs.io/en/latest/v3/codecs/sharding-indexed/index.html
      22             : 
      23             : /************************************************************************/
      24             : /*                     ZarrV3CodecShardingIndexed()                     */
      25             : /************************************************************************/
      26             : 
      27         660 : ZarrV3CodecShardingIndexed::ZarrV3CodecShardingIndexed() : ZarrV3Codec(NAME)
      28             : {
      29         660 : }
      30             : 
      31             : /************************************************************************/
      32             : /*                 ZarrV3CodecShardingIndexed::Clone()                  */
      33             : /************************************************************************/
      34             : 
      35           4 : std::unique_ptr<ZarrV3Codec> ZarrV3CodecShardingIndexed::Clone() const
      36             : {
      37           8 :     auto psClone = std::make_unique<ZarrV3CodecShardingIndexed>();
      38           8 :     ZarrArrayMetadata oOutputArrayMetadata;
      39           4 :     psClone->InitFromConfiguration(m_oConfiguration, m_oInputArrayMetadata,
      40             :                                    oOutputArrayMetadata,
      41             :                                    /* bEmitWarnings = */ false);
      42           8 :     return psClone;
      43             : }
      44             : 
      45             : /************************************************************************/
      46             : /*         ZarrV3CodecShardingIndexed::InitFromConfiguration()          */
      47             : /************************************************************************/
      48             : 
      49         660 : bool ZarrV3CodecShardingIndexed::InitFromConfiguration(
      50             :     const CPLJSONObject &configuration,
      51             :     const ZarrArrayMetadata &oInputArrayMetadata,
      52             :     ZarrArrayMetadata &oOutputArrayMetadata, bool bEmitWarnings)
      53             : {
      54         660 :     if (oInputArrayMetadata.anBlockSizes.empty())
      55             :     {
      56           0 :         CPLError(
      57             :             CE_Failure, CPLE_AppDefined,
      58             :             "Codec sharding_indexed: sharding not supported for scalar array");
      59           0 :         return false;
      60             :     }
      61             : 
      62         660 :     m_oConfiguration = configuration.Clone();
      63         660 :     m_oInputArrayMetadata = oInputArrayMetadata;
      64             : 
      65        1319 :     if (!configuration.IsValid() ||
      66         659 :         configuration.GetType() != CPLJSONObject::Type::Object)
      67             :     {
      68           2 :         CPLError(
      69             :             CE_Failure, CPLE_AppDefined,
      70             :             "Codec sharding_indexed: configuration missing or not an object");
      71           2 :         return false;
      72             :     }
      73             : 
      74        1974 :     const auto oChunkShape = configuration["chunk_shape"].ToArray();
      75        1314 :     if (!oChunkShape.IsValid() ||
      76         656 :         oChunkShape.GetType() != CPLJSONObject::Type::Array)
      77             :     {
      78           2 :         CPLError(CE_Failure, CPLE_AppDefined,
      79             :                  "Codec sharding_indexed: configuration.chunk_shape missing or "
      80             :                  "not an array");
      81           2 :         return false;
      82             :     }
      83        1312 :     if (static_cast<size_t>(oChunkShape.Size()) !=
      84         656 :         m_oInputArrayMetadata.anBlockSizes.size())
      85             :     {
      86           1 :         CPLError(CE_Failure, CPLE_AppDefined,
      87             :                  "Codec sharding_indexed: configuration.chunk_shape should "
      88             :                  "have the same shape as the array");
      89           1 :         return false;
      90             :     }
      91        1310 :     std::vector<size_t> anCountInnerChunks;
      92        1959 :     for (int i = 0; i < oChunkShape.Size(); ++i)
      93             :     {
      94        2614 :         if (oChunkShape[i].GetType() != CPLJSONObject::Type::Integer &&
      95        1307 :             oChunkShape[i].GetType() != CPLJSONObject::Type::Long)
      96             :         {
      97           0 :             CPLError(CE_Failure, CPLE_AppDefined,
      98             :                      "Codec sharding_indexed: configuration.chunk_shape[%d] "
      99             :                      "should be an integer",
     100             :                      i);
     101           0 :             return false;
     102             :         }
     103        1307 :         const int64_t nVal = oChunkShape[i].ToLong();
     104        2612 :         if (nVal <= 0 ||
     105        1305 :             static_cast<uint64_t>(nVal) >
     106        2612 :                 m_oInputArrayMetadata.anBlockSizes[i] ||
     107        1305 :             (m_oInputArrayMetadata.anBlockSizes[i] % nVal) != 0)
     108             :         {
     109           3 :             CPLError(
     110             :                 CE_Failure, CPLE_AppDefined,
     111             :                 "Codec sharding_indexed: configuration.chunk_shape[%d]=%" PRId64
     112             :                 " should be a strictly positive value that is a divisor of "
     113             :                 "%" PRIu64,
     114             :                 i, nVal,
     115           3 :                 static_cast<uint64_t>(m_oInputArrayMetadata.anBlockSizes[i]));
     116           3 :             return false;
     117             :         }
     118             :         // The following cast is safe since ZarrArray::ParseChunkSize() has
     119             :         // previously validated that m_oInputArrayMetadata.anBlockSizes[i] fits
     120             :         // on size_t
     121             :         if constexpr (sizeof(size_t) < sizeof(uint64_t))
     122             :         {
     123             :             // coverity[result_independent_of_operands]
     124             :             CPLAssert(nVal <= std::numeric_limits<size_t>::max());
     125             :         }
     126        1304 :         m_anInnerBlockSize.push_back(static_cast<size_t>(nVal));
     127        1304 :         anCountInnerChunks.push_back(
     128        1304 :             static_cast<size_t>(m_oInputArrayMetadata.anBlockSizes[i] / nVal));
     129             :     }
     130             : 
     131        1956 :     const auto oCodecs = configuration["codecs"];
     132         652 :     if (!oCodecs.IsValid() || oCodecs.GetType() != CPLJSONObject::Type::Array)
     133             :     {
     134           2 :         CPLError(CE_Failure, CPLE_AppDefined,
     135             :                  "Codec sharding_indexed: configuration.codecs missing or "
     136             :                  "not an array");
     137           2 :         return false;
     138             :     }
     139         650 :     if (oCodecs.ToArray().Size() == 0)
     140             :     {
     141           1 :         CPLError(CE_Failure, CPLE_AppDefined,
     142             :                  "Codec sharding_indexed: configuration.codecs[] is empty");
     143           1 :         return false;
     144             :     }
     145        1298 :     ZarrArrayMetadata inputArrayMetadataCodecs = m_oInputArrayMetadata;
     146         649 :     inputArrayMetadataCodecs.anBlockSizes = m_anInnerBlockSize;
     147             :     m_poCodecSequence =
     148         649 :         std::make_unique<ZarrV3CodecSequence>(inputArrayMetadataCodecs);
     149         649 :     if (!m_poCodecSequence->InitFromJson(oCodecs, oOutputArrayMetadata))
     150             :     {
     151           1 :         CPLError(CE_Failure, CPLE_AppDefined,
     152             :                  "Codec sharding_indexed: initialization of codecs failed");
     153           1 :         return false;
     154             :     }
     155             : 
     156         648 :     if (bEmitWarnings && m_poCodecSequence->SupportsPartialDecoding())
     157             :     {
     158             :         // Implementation limitation
     159           1 :         CPLError(CE_Warning, CPLE_AppDefined,
     160             :                  "Nested sharding detected. For now, partial decoding is only "
     161             :                  "implemented on the outer-most shard level");
     162             :     }
     163             : 
     164        1944 :     const auto oIndexCodecs = configuration["index_codecs"];
     165        1295 :     if (!oIndexCodecs.IsValid() ||
     166         647 :         oIndexCodecs.GetType() != CPLJSONObject::Type::Array)
     167             :     {
     168           2 :         CPLError(
     169             :             CE_Failure, CPLE_AppDefined,
     170             :             "Codec sharding_indexed: configuration.index_codecs missing or "
     171             :             "not an array");
     172           2 :         return false;
     173             :     }
     174         646 :     if (oIndexCodecs.ToArray().Size() == 0)
     175             :     {
     176           1 :         CPLError(
     177             :             CE_Failure, CPLE_AppDefined,
     178             :             "Codec sharding_indexed: configuration.index_codecs[] is empty");
     179           1 :         return false;
     180             :     }
     181        1290 :     ZarrArrayMetadata inputArrayMetadataIndex;
     182         645 :     inputArrayMetadataIndex.oElt.nativeType =
     183             :         DtypeElt::NativeType::UNSIGNED_INT;
     184         645 :     inputArrayMetadataIndex.oElt.nativeSize = sizeof(uint64_t);
     185             :     inputArrayMetadataIndex.oElt.gdalType =
     186         645 :         GDALExtendedDataType::Create(GDT_UInt64);
     187         645 :     inputArrayMetadataIndex.oElt.gdalSize = sizeof(uint64_t);
     188         645 :     inputArrayMetadataIndex.anBlockSizes = std::move(anCountInnerChunks);
     189             :     // 2 for offset and size
     190         645 :     inputArrayMetadataIndex.anBlockSizes.push_back(2);
     191             :     m_poIndexCodecSequence =
     192         645 :         std::make_unique<ZarrV3CodecSequence>(inputArrayMetadataIndex);
     193        1290 :     ZarrArrayMetadata oOutputArrayMetadataIndex;
     194         645 :     if (!m_poIndexCodecSequence->InitFromJson(oIndexCodecs,
     195             :                                               oOutputArrayMetadataIndex))
     196             :     {
     197           1 :         CPLError(
     198             :             CE_Failure, CPLE_AppDefined,
     199             :             "Codec sharding_indexed: initialization of index_codecs failed");
     200           1 :         return false;
     201             :     }
     202         644 :     const auto &indexCodecs = m_poIndexCodecSequence->GetCodecs();
     203         644 :     if (indexCodecs.empty())
     204             :     {
     205             :         // ok, there is only a "bytes" codec, optimized away if the order
     206             :         // is the one of the native architecture
     207             :     }
     208        1002 :     else if (indexCodecs[0]->GetName() == ZarrV3CodecBytes::NAME ||
     209         500 :              indexCodecs[0]->GetName() == ZarrV3CodecCRC32C::NAME)
     210             :     {
     211             :         // ok
     212             :     }
     213           1 :     else if (indexCodecs.size() == 2 &&
     214           0 :              indexCodecs[1]->GetName() == ZarrV3CodecCRC32C::NAME)
     215             :     {
     216             :         // ok
     217             :     }
     218             :     else
     219             :     {
     220           1 :         CPLError(CE_Failure, CPLE_NotSupported,
     221             :                  "Codec sharding_indexed: this implementation only supports "
     222             :                  "Bytes, possibly followed by CRC32C, as index_codecs");
     223           1 :         return false;
     224             :     }
     225         643 :     m_bIndexHasCRC32 = (!indexCodecs.empty() && indexCodecs.back()->GetName() ==
     226             :                                                     ZarrV3CodecCRC32C::NAME);
     227             : 
     228             :     const std::string osIndexLocation =
     229        1929 :         configuration.GetString("index_location", "end");
     230         643 :     if (osIndexLocation != "start" && osIndexLocation != "end")
     231             :     {
     232           1 :         CPLError(CE_Failure, CPLE_AppDefined,
     233             :                  "Codec sharding_indexed: invalid value for index_location");
     234           1 :         return false;
     235             :     }
     236         642 :     m_bIndexLocationAtEnd = (osIndexLocation == "end");
     237             : 
     238         642 :     return true;
     239             : }
     240             : 
     241             : /************************************************************************/
     242             : /*                 ZarrV3CodecShardingIndexed::Encode()                 */
     243             : /************************************************************************/
     244             : 
     245           0 : bool ZarrV3CodecShardingIndexed::Encode(const ZarrByteVectorQuickResize &,
     246             :                                         ZarrByteVectorQuickResize &) const
     247             : {
     248           0 :     CPLError(CE_Failure, CPLE_NotSupported,
     249             :              "ZarrV3CodecShardingIndexed::Encode() not supported");
     250           0 :     return false;
     251             : }
     252             : 
     253             : /************************************************************************/
     254             : /*                     CopySubArrayIntoLargerOne()                      */
     255             : /************************************************************************/
     256             : 
     257             : static void
     258         144 : CopySubArrayIntoLargerOne(const ZarrByteVectorQuickResize &abyChunk,
     259             :                           const std::vector<size_t> &anInnerBlockSize,
     260             :                           const std::vector<size_t> &anInnerBlockIndices,
     261             :                           ZarrByteVectorQuickResize &abyDst,
     262             :                           const std::vector<size_t> &anDstBlockSize,
     263             :                           const size_t nDTSize)
     264             : {
     265         144 :     const auto nDims = anInnerBlockSize.size();
     266         144 :     CPLAssert(nDims > 0);
     267         144 :     CPLAssert(nDims == anInnerBlockIndices.size());
     268         144 :     CPLAssert(nDims == anDstBlockSize.size());
     269             :     // +1 just to make some gcc versions not emit -Wnull-dereference false positives
     270         288 :     std::vector<GByte *> dstPtrStack(nDims + 1);
     271         288 :     std::vector<size_t> count(nDims + 1);
     272         288 :     std::vector<size_t> dstStride(nDims + 1);
     273             : 
     274         144 :     size_t nDstStride = nDTSize;
     275         432 :     for (size_t iDim = nDims; iDim > 0;)
     276             :     {
     277         288 :         --iDim;
     278         288 :         dstStride[iDim] = nDstStride;
     279         288 :         nDstStride *= anDstBlockSize[iDim];
     280             :     }
     281             : 
     282         144 :     dstPtrStack[0] = abyDst.data();
     283         432 :     for (size_t iDim = 0; iDim < nDims; ++iDim)
     284             :     {
     285         288 :         CPLAssert((anInnerBlockIndices[iDim] + 1) * anInnerBlockSize[iDim] <=
     286             :                   anDstBlockSize[iDim]);
     287         576 :         dstPtrStack[0] += anInnerBlockIndices[iDim] * anInnerBlockSize[iDim] *
     288         288 :                           dstStride[iDim];
     289             :     }
     290         144 :     const GByte *pabySrc = abyChunk.data();
     291             : 
     292         144 :     const size_t nLastDimSize = anInnerBlockSize.back() * nDTSize;
     293         144 :     size_t dimIdx = 0;
     294         288 : lbl_next_depth:
     295         288 :     if (dimIdx + 1 == nDims)
     296             :     {
     297         144 :         memcpy(dstPtrStack[dimIdx], pabySrc, nLastDimSize);
     298         144 :         pabySrc += nLastDimSize;
     299             :     }
     300             :     else
     301             :     {
     302         144 :         count[dimIdx] = anInnerBlockSize[dimIdx];
     303             :         while (true)
     304             :         {
     305         144 :             dimIdx++;
     306         144 :             dstPtrStack[dimIdx] = dstPtrStack[dimIdx - 1];
     307         144 :             goto lbl_next_depth;
     308         144 :         lbl_return_to_caller:
     309         144 :             dimIdx--;
     310         144 :             if (--count[dimIdx] == 0)
     311         144 :                 break;
     312           0 :             dstPtrStack[dimIdx] += dstStride[dimIdx];
     313             :         }
     314             :     }
     315         288 :     if (dimIdx > 0)
     316         144 :         goto lbl_return_to_caller;
     317         144 : }
     318             : 
     319             : /************************************************************************/
     320             : /*                           FillWithNoData()                           */
     321             : /************************************************************************/
     322             : 
     323         150 : static void FillWithNoData(ZarrByteVectorQuickResize &abyDst,
     324             :                            const size_t nCount,
     325             :                            const ZarrArrayMetadata &metadata)
     326             : {
     327         150 :     const size_t nDTSize = metadata.oElt.nativeSize;
     328         300 :     if (metadata.abyNoData.empty() ||
     329         300 :         metadata.abyNoData == std::vector<GByte>(nDTSize, 0))
     330             :     {
     331         150 :         memset(abyDst.data(), 0, nDTSize * nCount);
     332             :     }
     333             :     else
     334             :     {
     335           0 :         CPLAssert(metadata.abyNoData.size() == nDTSize);
     336           0 :         for (size_t i = 0; i < nCount; ++i)
     337             :         {
     338           0 :             memcpy(abyDst.data() + i * nDTSize, metadata.abyNoData.data(),
     339             :                    nDTSize);
     340             :         }
     341             :     }
     342         150 : }
     343             : 
     344             : /************************************************************************/
     345             : /*                 ZarrV3CodecShardingIndexed::Decode()                 */
     346             : /************************************************************************/
     347             : 
     348         146 : bool ZarrV3CodecShardingIndexed::Decode(const ZarrByteVectorQuickResize &abySrc,
     349             :                                         ZarrByteVectorQuickResize &abyDst) const
     350             : {
     351         146 :     size_t nInnerChunks = 1;
     352         438 :     for (size_t i = 0; i < m_anInnerBlockSize.size(); ++i)
     353             :     {
     354             :         const size_t nCountInnerChunksThisdim =
     355         292 :             m_oInputArrayMetadata.anBlockSizes[i] / m_anInnerBlockSize[i];
     356         292 :         nInnerChunks *= nCountInnerChunksThisdim;
     357             :     }
     358             : 
     359         292 :     const size_t nIndexEncodedSize = nInnerChunks * sizeof(Location) +
     360         146 :                                      (m_bIndexHasCRC32 ? sizeof(uint32_t) : 0);
     361         292 :     ZarrByteVectorQuickResize abyIndex;
     362         146 :     if (m_bIndexLocationAtEnd)
     363             :     {
     364         146 :         abyIndex.insert(abyIndex.end(),
     365         146 :                         abySrc.begin() + (abySrc.size() - nIndexEncodedSize),
     366         438 :                         abySrc.end());
     367             :     }
     368             :     else
     369             :     {
     370           0 :         abyIndex.insert(abyIndex.end(), abySrc.begin(),
     371           0 :                         abySrc.end() + nIndexEncodedSize);
     372             :     }
     373             : 
     374         146 :     if (!m_poIndexCodecSequence->Decode(abyIndex))
     375             :     {
     376           0 :         CPLError(
     377             :             CE_Failure, CPLE_NotSupported,
     378             :             "ZarrV3CodecShardingIndexed::Decode(): cannot decode shard index");
     379           0 :         return false;
     380             :     }
     381             : 
     382         146 :     if (abyIndex.size() != nInnerChunks * sizeof(Location))
     383             :     {
     384           0 :         CPLError(CE_Failure, CPLE_NotSupported,
     385             :                  "ZarrV3CodecShardingIndexed::Decode(): shard index has not "
     386             :                  "expected size");
     387           0 :         return false;
     388             :     }
     389             : 
     390             :     const Location *panLocations =
     391         146 :         reinterpret_cast<const Location *>(abyIndex.data());
     392             : 
     393         292 :     ZarrByteVectorQuickResize abyChunk;
     394         146 :     const auto nDTSize = m_oInputArrayMetadata.oElt.nativeSize;
     395             :     const size_t nExpectedDecodedChunkSize =
     396         146 :         nDTSize * MultiplyElements(m_anInnerBlockSize);
     397             :     const size_t nDstCount =
     398         146 :         MultiplyElements(m_oInputArrayMetadata.anBlockSizes);
     399             : 
     400             :     try
     401             :     {
     402         146 :         abyDst.resize(nDstCount * nDTSize);
     403             :     }
     404           0 :     catch (const std::exception &)
     405             :     {
     406           0 :         CPLError(CE_Failure, CPLE_OutOfMemory,
     407             :                  "Cannot allocate memory for decoded shard");
     408           0 :         return false;
     409             :     }
     410             : 
     411         146 :     FillWithNoData(abyDst, nDstCount, m_oInputArrayMetadata);
     412             : 
     413         292 :     std::vector<size_t> anChunkIndices(m_anInnerBlockSize.size(), 0);
     414         380 :     for (size_t iChunk = 0; iChunk < nInnerChunks; ++iChunk)
     415             :     {
     416         364 :         if (iChunk > 0)
     417             :         {
     418             :             // Update chunk coordinates
     419         218 :             size_t iDim = m_anInnerBlockSize.size() - 1;
     420         299 :             while (++anChunkIndices[iDim] ==
     421         598 :                    m_oInputArrayMetadata.anBlockSizes[iDim] /
     422         299 :                        m_anInnerBlockSize[iDim])
     423             :             {
     424          81 :                 anChunkIndices[iDim] = 0;
     425          81 :                 --iDim;
     426             :             }
     427             :         }
     428             : 
     429             : #ifdef DEBUG_VERBOSE
     430             :         CPLDebug("ZARR", "Chunk %" PRIu64 ": offset %" PRIu64 ", size %" PRIu64,
     431             :                  static_cast<uint64_t>(iChunk), panLocations[iChunk].nOffset,
     432             :                  panLocations[iChunk].nSize);
     433             : #endif
     434             : 
     435         818 :         if (panLocations[iChunk].nOffset ==
     436         474 :                 std::numeric_limits<uint64_t>::max() &&
     437         110 :             panLocations[iChunk].nSize == std::numeric_limits<uint64_t>::max())
     438             :         {
     439             :             // Empty chunk
     440          90 :             continue;
     441             :         }
     442             : 
     443         488 :         if (panLocations[iChunk].nOffset >= abySrc.size() ||
     444         214 :             panLocations[iChunk].nSize >
     445         214 :                 abySrc.size() - panLocations[iChunk].nOffset)
     446             :         {
     447          80 :             CPLError(CE_Failure, CPLE_NotSupported,
     448             :                      "ZarrV3CodecShardingIndexed::Decode(): invalid chunk "
     449             :                      "location for chunk %" PRIu64 ": offset=%" PRIu64
     450             :                      ", size=%" PRIu64,
     451             :                      static_cast<uint64_t>(iChunk),
     452          80 :                      panLocations[iChunk].nOffset, panLocations[iChunk].nSize);
     453          80 :             return false;
     454             :         }
     455             : 
     456         194 :         abyChunk.clear();
     457             :         abyChunk.insert(
     458         194 :             abyChunk.end(),
     459           0 :             abySrc.begin() + static_cast<size_t>(panLocations[iChunk].nOffset),
     460           0 :             abySrc.begin() + static_cast<size_t>(panLocations[iChunk].nOffset +
     461         388 :                                                  panLocations[iChunk].nSize));
     462         194 :         if (!m_poCodecSequence->Decode(abyChunk))
     463             :         {
     464          50 :             CPLError(CE_Failure, CPLE_NotSupported,
     465             :                      "ZarrV3CodecShardingIndexed::Decode(): cannot decode "
     466             :                      "chunk %" PRIu64,
     467             :                      static_cast<uint64_t>(iChunk));
     468          50 :             return false;
     469             :         }
     470             : 
     471         144 :         if (abyChunk.size() != nExpectedDecodedChunkSize)
     472             :         {
     473           0 :             CPLError(CE_Failure, CPLE_NotSupported,
     474             :                      "ZarrV3CodecShardingIndexed::Decode(): decoded size for "
     475             :                      "chunk %" PRIu64 " is %" PRIu64 " whereas %" PRIu64
     476             :                      " is expected",
     477             :                      static_cast<uint64_t>(iChunk),
     478           0 :                      static_cast<uint64_t>(abyChunk.size()),
     479             :                      static_cast<uint64_t>(nExpectedDecodedChunkSize));
     480           0 :             return false;
     481             :         }
     482             : 
     483         144 :         CopySubArrayIntoLargerOne(abyChunk, m_anInnerBlockSize, anChunkIndices,
     484         144 :                                   abyDst, m_oInputArrayMetadata.anBlockSizes,
     485             :                                   nDTSize);
     486             :     }
     487             : 
     488          16 :     return true;
     489             : }
     490             : 
     491             : /************************************************************************/
     492             : /*             ZarrV3CodecShardingIndexed::DecodePartial()              */
     493             : /************************************************************************/
     494             : 
     495        1156 : bool ZarrV3CodecShardingIndexed::DecodePartial(
     496             :     VSIVirtualHandle *poFile, const ZarrByteVectorQuickResize & /* abySrc */,
     497             :     ZarrByteVectorQuickResize &abyDst, std::vector<size_t> &anStartIdx,
     498             :     std::vector<size_t> &anCount)
     499             : {
     500        1156 :     CPLAssert(anStartIdx.size() == m_oInputArrayMetadata.anBlockSizes.size());
     501        1156 :     CPLAssert(anStartIdx.size() == anCount.size());
     502             : 
     503        1156 :     size_t nInnerChunkCount = 1;
     504        1156 :     size_t nInnerChunkIdx = 0;
     505        1156 :     size_t nInnerChunkCountPrevDim = 1;
     506        3468 :     for (size_t i = 0; i < anStartIdx.size(); ++i)
     507             :     {
     508        2312 :         CPLAssert(anStartIdx[i] + anCount[i] <=
     509             :                   m_oInputArrayMetadata.anBlockSizes[i]);
     510        4624 :         if ((anStartIdx[i] % m_anInnerBlockSize[i]) != 0 ||
     511        2312 :             anCount[i] != m_anInnerBlockSize[i])
     512             :         {
     513             :             // Should not happen with the current call sites.
     514           0 :             CPLError(CE_Failure, CPLE_AppDefined,
     515             :                      "ZarrV3CodecShardingIndexed::DecodePartial() only "
     516             :                      "supported on an exact inner chunk");
     517           0 :             return false;
     518             :         }
     519             : 
     520             :         const size_t nCountInnerChunksThisDim =
     521        2312 :             m_oInputArrayMetadata.anBlockSizes[i] / m_anInnerBlockSize[i];
     522        2312 :         nInnerChunkIdx *= nInnerChunkCountPrevDim;
     523        2312 :         nInnerChunkIdx += anStartIdx[i] / m_anInnerBlockSize[i];
     524        2312 :         nInnerChunkCount *= nCountInnerChunksThisDim;
     525        2312 :         nInnerChunkCountPrevDim = nCountInnerChunksThisDim;
     526             :     }
     527             : 
     528        1156 :     abyDst.clear();
     529             : 
     530        1156 :     const auto nDTSize = m_oInputArrayMetadata.oElt.nativeSize;
     531        1156 :     const auto nExpectedDecodedChunkSize = nDTSize * MultiplyElements(anCount);
     532             : 
     533        1156 :     vsi_l_offset nLocationOffset =
     534             :         static_cast<vsi_l_offset>(nInnerChunkIdx) * sizeof(Location);
     535        1156 :     if (m_bIndexLocationAtEnd)
     536             :     {
     537        1156 :         poFile->Seek(0, SEEK_END);
     538        1156 :         const auto nFileSize = poFile->Tell();
     539        1156 :         vsi_l_offset nIndexSize =
     540             :             static_cast<vsi_l_offset>(nInnerChunkCount) * sizeof(Location);
     541        1156 :         if (m_bIndexHasCRC32)
     542        1153 :             nIndexSize += sizeof(uint32_t);
     543        1156 :         if (nFileSize < nIndexSize)
     544             :         {
     545           0 :             CPLError(CE_Failure, CPLE_AppDefined,
     546             :                      "ZarrV3CodecShardingIndexed::DecodePartial(): shard file "
     547             :                      "too small");
     548           0 :             return false;
     549             :         }
     550        1156 :         nLocationOffset += nFileSize - nIndexSize;
     551             :     }
     552             : 
     553             :     Location loc;
     554        2312 :     if (poFile->Seek(nLocationOffset, SEEK_SET) != 0 ||
     555        1156 :         poFile->Read(&loc, 1, sizeof(loc)) != sizeof(loc))
     556             :     {
     557             : 
     558           0 :         CPLError(CE_Failure, CPLE_AppDefined,
     559             :                  "ZarrV3CodecShardingIndexed::DecodePartial(): "
     560             :                  "cannot read index for chunk %" PRIu64,
     561             :                  static_cast<uint64_t>(nInnerChunkIdx));
     562           0 :         return false;
     563             :     }
     564             : 
     565        2309 :     if (!m_poIndexCodecSequence->GetCodecs().empty() &&
     566        1153 :         m_poIndexCodecSequence->GetCodecs().front()->GetName() ==
     567        2309 :             ZarrV3CodecBytes::NAME &&
     568           0 :         !m_poIndexCodecSequence->GetCodecs().front()->IsNoOp())
     569             :     {
     570           0 :         CPL_SWAP64PTR(&(loc.nOffset));
     571           0 :         CPL_SWAP64PTR(&(loc.nSize));
     572             :     }
     573             : 
     574        1160 :     if (loc.nOffset == std::numeric_limits<uint64_t>::max() &&
     575           4 :         loc.nSize == std::numeric_limits<uint64_t>::max())
     576             :     {
     577             :         // Empty chunk
     578             :         try
     579             :         {
     580           4 :             abyDst.resize(nExpectedDecodedChunkSize);
     581             :         }
     582           0 :         catch (const std::exception &)
     583             :         {
     584           0 :             CPLError(CE_Failure, CPLE_OutOfMemory,
     585             :                      "Cannot allocate memory for decoded shard");
     586           0 :             return false;
     587             :         }
     588           4 :         FillWithNoData(abyDst, MultiplyElements(anCount),
     589           4 :                        m_oInputArrayMetadata);
     590           4 :         return true;
     591             :     }
     592             : 
     593        1152 :     constexpr size_t THRESHOLD = 10 * 1024 * 1024;
     594        1152 :     if (loc.nSize > THRESHOLD)
     595             :     {
     596             :         // When the chunk size is above a certain threshold, check it against
     597             :         // the actual file size to avoid excessive memory allocation attempts.
     598             : 
     599          32 :         poFile->Seek(0, SEEK_END);
     600          32 :         const auto nFileSize = poFile->Tell();
     601             : 
     602          32 :         if (loc.nOffset >= nFileSize || loc.nSize > nFileSize - loc.nOffset)
     603             :         {
     604          32 :             CPLError(
     605             :                 CE_Failure, CPLE_NotSupported,
     606             :                 "ZarrV3CodecShardingIndexed::DecodePartial(): invalid chunk "
     607             :                 "location for chunk %" PRIu64 ": offset=%" PRIu64
     608             :                 ", size=%" PRIu64,
     609             :                 static_cast<uint64_t>(nInnerChunkIdx), loc.nOffset, loc.nSize);
     610          32 :             return false;
     611             :         }
     612             :     }
     613             : 
     614             :     if constexpr (sizeof(size_t) < sizeof(uint64_t))
     615             :     {
     616             :         // coverity[result_independent_of_operands]
     617             :         if (loc.nSize > std::numeric_limits<size_t>::max())
     618             :         {
     619             :             CPLError(
     620             :                 CE_Failure, CPLE_NotSupported,
     621             :                 "ZarrV3CodecShardingIndexed::DecodePartial(): too large chunk "
     622             :                 "size for chunk %" PRIu64 " for this platform: size=%" PRIu64,
     623             :                 static_cast<uint64_t>(nInnerChunkIdx), loc.nSize);
     624             :             return false;
     625             :         }
     626             :     }
     627             : 
     628             :     try
     629             :     {
     630        1120 :         abyDst.resize(static_cast<size_t>(loc.nSize));
     631             :     }
     632           0 :     catch (const std::exception &)
     633             :     {
     634           0 :         CPLError(CE_Failure, CPLE_OutOfMemory,
     635             :                  "Cannot allocate memory for decoded shard");
     636           0 :         return false;
     637             :     }
     638             : 
     639        2240 :     if (poFile->Seek(loc.nOffset, SEEK_SET) != 0 ||
     640        1120 :         poFile->Read(abyDst.data(), 1, abyDst.size()) != abyDst.size())
     641             :     {
     642          41 :         CPLError(CE_Failure, CPLE_NotSupported,
     643             :                  "ZarrV3CodecShardingIndexed::DecodePartial(): cannot read "
     644             :                  "data for chunk %" PRIu64 ": offset=%" PRIu64
     645             :                  ", size=%" PRIu64,
     646             :                  static_cast<uint64_t>(nInnerChunkIdx), loc.nOffset, loc.nSize);
     647          41 :         return false;
     648             :     }
     649             : 
     650        1079 :     if (!m_poCodecSequence->Decode(abyDst))
     651             :     {
     652         363 :         CPLError(CE_Failure, CPLE_NotSupported,
     653             :                  "ZarrV3CodecShardingIndexed::DecodePartial(): cannot decode "
     654             :                  "chunk %" PRIu64,
     655             :                  static_cast<uint64_t>(nInnerChunkIdx));
     656         363 :         return false;
     657             :     }
     658             : 
     659         716 :     if (abyDst.size() != nExpectedDecodedChunkSize)
     660             :     {
     661           0 :         CPLError(
     662             :             CE_Failure, CPLE_NotSupported,
     663             :             "ZarrV3CodecShardingIndexed::DecodePartial(): decoded size for "
     664             :             "chunk %" PRIu64 " is %" PRIu64 " whereas %" PRIu64 " is expected",
     665             :             static_cast<uint64_t>(nInnerChunkIdx),
     666           0 :             static_cast<uint64_t>(abyDst.size()),
     667             :             static_cast<uint64_t>(nExpectedDecodedChunkSize));
     668           0 :         return false;
     669             :     }
     670             : 
     671         716 :     return true;
     672             : }
     673             : 
     674             : /************************************************************************/
     675             : /*           ZarrV3CodecShardingIndexed::BatchDecodePartial()           */
     676             : /************************************************************************/
     677             : 
     678        3940 : bool ZarrV3CodecShardingIndexed::BatchDecodePartial(
     679             :     VSIVirtualHandle *poFile,
     680             :     const std::vector<std::pair<std::vector<size_t>, std::vector<size_t>>>
     681             :         &anRequests,
     682             :     std::vector<ZarrByteVectorQuickResize> &aResults)
     683             : {
     684        3940 :     if (anRequests.empty())
     685           0 :         return true;
     686             : 
     687        3940 :     const auto nDTSize = m_oInputArrayMetadata.oElt.nativeSize;
     688             : 
     689             :     // --- Compute inner chunk count and per-request inner chunk indices ---
     690        3940 :     size_t nInnerChunkCount = 1;
     691       11820 :     for (size_t i = 0; i < m_oInputArrayMetadata.anBlockSizes.size(); ++i)
     692             :     {
     693        7880 :         nInnerChunkCount *=
     694        7880 :             m_oInputArrayMetadata.anBlockSizes[i] / m_anInnerBlockSize[i];
     695             :     }
     696             : 
     697             :     // Determine whether index codec requires byte-swapping
     698             :     const bool bSwapIndex =
     699        7877 :         !m_poIndexCodecSequence->GetCodecs().empty() &&
     700        3937 :         m_poIndexCodecSequence->GetCodecs().front()->GetName() ==
     701        7877 :             ZarrV3CodecBytes::NAME &&
     702           1 :         !m_poIndexCodecSequence->GetCodecs().front()->IsNoOp();
     703             : 
     704             :     // Compute index base offset. For index-at-end, we need the file size.
     705        3940 :     vsi_l_offset nIndexBaseOffset = 0;
     706        3940 :     if (m_bIndexLocationAtEnd)
     707             :     {
     708        3940 :         poFile->Seek(0, SEEK_END);
     709        3940 :         const auto nFileSize = poFile->Tell();
     710        3940 :         vsi_l_offset nIndexSize =
     711             :             static_cast<vsi_l_offset>(nInnerChunkCount) * sizeof(Location);
     712        3940 :         if (m_bIndexHasCRC32)
     713        3937 :             nIndexSize += sizeof(uint32_t);
     714        3940 :         if (nFileSize < nIndexSize)
     715             :         {
     716           0 :             CPLError(CE_Failure, CPLE_AppDefined,
     717             :                      "BatchDecodePartial: shard file too small");
     718           0 :             return false;
     719             :         }
     720        3940 :         nIndexBaseOffset = nFileSize - nIndexSize;
     721             :     }
     722             : 
     723             :     // Build per-request inner chunk indices
     724        7880 :     std::vector<size_t> anInnerChunkIndices(anRequests.size());
     725       15758 :     for (size_t iReq = 0; iReq < anRequests.size(); ++iReq)
     726             :     {
     727       11818 :         const auto &anStartIdx = anRequests[iReq].first;
     728       11818 :         CPLAssert(anStartIdx.size() ==
     729             :                   m_oInputArrayMetadata.anBlockSizes.size());
     730             : 
     731       11818 :         size_t nInnerChunkIdx = 0;
     732       11818 :         size_t nInnerChunkCountPrevDim = 1;
     733       35454 :         for (size_t i = 0; i < anStartIdx.size(); ++i)
     734             :         {
     735       23636 :             nInnerChunkIdx *= nInnerChunkCountPrevDim;
     736       23636 :             nInnerChunkIdx += anStartIdx[i] / m_anInnerBlockSize[i];
     737       23636 :             nInnerChunkCountPrevDim =
     738       23636 :                 m_oInputArrayMetadata.anBlockSizes[i] / m_anInnerBlockSize[i];
     739             :         }
     740       11818 :         anInnerChunkIndices[iReq] = nInnerChunkIdx;
     741             :     }
     742             : 
     743             :     // --- Pass 1: ReadMultiRange for index entries (16 bytes each) ---
     744        7880 :     std::vector<vsi_l_offset> anIdxOffsets(anRequests.size());
     745        7880 :     std::vector<size_t> anIdxSizes(anRequests.size(), sizeof(Location));
     746        7880 :     std::vector<Location> aLocations(anRequests.size());
     747        7880 :     std::vector<void *> ppIdxData(anRequests.size());
     748             : 
     749       15758 :     for (size_t i = 0; i < anRequests.size(); ++i)
     750             :     {
     751       23636 :         anIdxOffsets[i] = nIndexBaseOffset +
     752       11818 :                           static_cast<vsi_l_offset>(anInnerChunkIndices[i]) *
     753             :                               sizeof(Location);
     754       11818 :         ppIdxData[i] = &aLocations[i];
     755             :     }
     756             : 
     757        3940 :     if (poFile->ReadMultiRange(static_cast<int>(anRequests.size()),
     758        3940 :                                ppIdxData.data(), anIdxOffsets.data(),
     759        7880 :                                anIdxSizes.data()) != 0)
     760             :     {
     761           0 :         CPLError(CE_Failure, CPLE_AppDefined,
     762             :                  "BatchDecodePartial: ReadMultiRange() failed for index");
     763           0 :         return false;
     764             :     }
     765             : 
     766             :     // Byte-swap if needed
     767        3940 :     if (bSwapIndex)
     768             :     {
     769           3 :         for (auto &loc : aLocations)
     770             :         {
     771           2 :             CPL_SWAP64PTR(&(loc.nOffset));
     772           2 :             CPL_SWAP64PTR(&(loc.nSize));
     773             :         }
     774             :     }
     775             : 
     776             :     // --- Classify requests: empty chunks vs data chunks ---
     777        3940 :     aResults.resize(anRequests.size());
     778             : 
     779             :     struct DataRange
     780             :     {
     781             :         size_t nReqIdx;
     782             :     };
     783             : 
     784        7880 :     std::vector<DataRange> aDataRanges;
     785        7880 :     std::vector<vsi_l_offset> anDataOffsets;
     786        7880 :     std::vector<size_t> anDataSizes;
     787             : 
     788       15758 :     for (size_t iReq = 0; iReq < anRequests.size(); ++iReq)
     789             :     {
     790       11818 :         const auto &anCount = anRequests[iReq].second;
     791             :         const auto nExpectedDecodedChunkSize =
     792       11818 :             nDTSize * MultiplyElements(anCount);
     793       11818 :         const Location &loc = aLocations[iReq];
     794             : 
     795       11818 :         if (loc.nOffset == std::numeric_limits<uint64_t>::max() &&
     796           0 :             loc.nSize == std::numeric_limits<uint64_t>::max())
     797             :         {
     798             :             // Empty chunk — fill with nodata
     799             :             try
     800             :             {
     801           0 :                 aResults[iReq].resize(nExpectedDecodedChunkSize);
     802             :             }
     803           0 :             catch (const std::exception &)
     804             :             {
     805           0 :                 CPLError(CE_Failure, CPLE_OutOfMemory,
     806             :                          "Cannot allocate memory for decoded chunk");
     807           0 :                 return false;
     808             :             }
     809           0 :             FillWithNoData(aResults[iReq], MultiplyElements(anCount),
     810           0 :                            m_oInputArrayMetadata);
     811           0 :             continue;
     812             :         }
     813             : 
     814             :         if constexpr (sizeof(size_t) < sizeof(uint64_t))
     815             :         {
     816             :             if (loc.nSize > std::numeric_limits<size_t>::max())
     817             :             {
     818             :                 CPLError(CE_Failure, CPLE_NotSupported,
     819             :                          "BatchDecodePartial: too large chunk size");
     820             :                 return false;
     821             :             }
     822             :         }
     823             : 
     824       11818 :         aDataRanges.push_back({iReq});
     825       11818 :         anDataOffsets.push_back(loc.nOffset);
     826       11818 :         anDataSizes.push_back(static_cast<size_t>(loc.nSize));
     827             :     }
     828             : 
     829        3940 :     if (aDataRanges.empty())
     830           0 :         return true;
     831             : 
     832             :     // Validate against file size (same threshold as DecodePartial)
     833        3940 :     constexpr size_t THRESHOLD = 10 * 1024 * 1024;
     834             :     {
     835        3940 :         size_t nMaxSize = 0;
     836       15758 :         for (const auto &s : anDataSizes)
     837       11818 :             nMaxSize = std::max(nMaxSize, s);
     838        3940 :         if (nMaxSize > THRESHOLD)
     839             :         {
     840          32 :             poFile->Seek(0, SEEK_END);
     841          32 :             const auto nFileSize = poFile->Tell();
     842          80 :             for (size_t i = 0; i < aDataRanges.size(); ++i)
     843             :             {
     844         160 :                 if (anDataOffsets[i] >= nFileSize ||
     845          80 :                     anDataSizes[i] > nFileSize - anDataOffsets[i])
     846             :                 {
     847          32 :                     CPLError(CE_Failure, CPLE_NotSupported,
     848             :                              "BatchDecodePartial: invalid chunk location: "
     849             :                              "offset=%" PRIu64 ", size=%" PRIu64,
     850          32 :                              static_cast<uint64_t>(anDataOffsets[i]),
     851          32 :                              static_cast<uint64_t>(anDataSizes[i]));
     852          32 :                     return false;
     853             :                 }
     854             :             }
     855             :         }
     856             :     }
     857             : 
     858             :     // --- Pass 2: ReadMultiRange for data chunks ---
     859        7816 :     std::vector<ZarrByteVectorQuickResize> aCompressed(aDataRanges.size());
     860        7816 :     std::vector<void *> ppData(aDataRanges.size());
     861             : 
     862       15598 :     for (size_t i = 0; i < aDataRanges.size(); ++i)
     863             :     {
     864             :         try
     865             :         {
     866       11690 :             aCompressed[i].resize(anDataSizes[i]);
     867             :         }
     868           0 :         catch (const std::exception &)
     869             :         {
     870           0 :             CPLError(CE_Failure, CPLE_OutOfMemory,
     871             :                      "Cannot allocate memory for compressed chunk");
     872           0 :             return false;
     873             :         }
     874       11690 :         ppData[i] = aCompressed[i].data();
     875             :     }
     876             : 
     877        3908 :     CPLDebugOnly("ZARR",
     878             :                  "BatchDecodePartial: ReadMultiRange() with %d data ranges",
     879             :                  static_cast<int>(aDataRanges.size()));
     880             : 
     881        3908 :     if (poFile->ReadMultiRange(static_cast<int>(aDataRanges.size()),
     882        3908 :                                ppData.data(), anDataOffsets.data(),
     883        7816 :                                anDataSizes.data()) != 0)
     884             :     {
     885          41 :         CPLError(CE_Failure, CPLE_AppDefined,
     886             :                  "BatchDecodePartial: ReadMultiRange() failed for data");
     887          41 :         return false;
     888             :     }
     889             : 
     890             :     // --- Decompress each chunk ---
     891       14441 :     for (size_t i = 0; i < aDataRanges.size(); ++i)
     892             :     {
     893       10937 :         const size_t iReq = aDataRanges[i].nReqIdx;
     894       10937 :         const auto &anCount = anRequests[iReq].second;
     895             :         const auto nExpectedDecodedChunkSize =
     896       10937 :             nDTSize * MultiplyElements(anCount);
     897             : 
     898       10937 :         if (!m_poCodecSequence->Decode(aCompressed[i]))
     899             :         {
     900         363 :             CPLError(CE_Failure, CPLE_NotSupported,
     901             :                      "BatchDecodePartial: cannot decode chunk %" PRIu64,
     902         363 :                      static_cast<uint64_t>(anInnerChunkIndices[iReq]));
     903         363 :             return false;
     904             :         }
     905             : 
     906       10574 :         if (aCompressed[i].size() != nExpectedDecodedChunkSize)
     907             :         {
     908           0 :             CPLError(CE_Failure, CPLE_NotSupported,
     909             :                      "BatchDecodePartial: decoded size %" PRIu64
     910             :                      " != expected %" PRIu64,
     911           0 :                      static_cast<uint64_t>(aCompressed[i].size()),
     912             :                      static_cast<uint64_t>(nExpectedDecodedChunkSize));
     913           0 :             return false;
     914             :         }
     915             : 
     916       10574 :         aResults[iReq] = std::move(aCompressed[i]);
     917             :     }
     918             : 
     919        3504 :     return true;
     920             : }
     921             : 
     922             : /************************************************************************/
     923             : /*         ZarrV3CodecShardingIndexed::GetInnerMostBlockSize()          */
     924             : /************************************************************************/
     925             : 
     926         499 : std::vector<size_t> ZarrV3CodecShardingIndexed::GetInnerMostBlockSize(
     927             :     const std::vector<size_t> &) const
     928             : {
     929         499 :     return m_anInnerBlockSize;
     930             :     // TODO if we one day properly support nested sharding
     931             :     // return m_poCodecSequence->GetInnerMostBlockSize(m_anInnerBlockSize);
     932             : }

Generated by: LCOV version 1.14