LCOV - code coverage report
Current view: top level - ogr/ogrsf_frmts/parquet - ogrparquetwriterlayer.cpp (source / functions) Hit Total Coverage
Test: gdal_filtered.info Lines: 513 591 86.8 %
Date: 2026-06-19 21:24:00 Functions: 26 26 100.0 %

          Line data    Source code
       1             : /******************************************************************************
       2             :  *
       3             :  * Project:  Parquet Translator
       4             :  * Purpose:  Implements OGRParquetDriver.
       5             :  * Author:   Even Rouault, <even.rouault at spatialys.com>
       6             :  *
       7             :  ******************************************************************************
       8             :  * Copyright (c) 2022, Planet Labs
       9             :  *
      10             :  * SPDX-License-Identifier: MIT
      11             :  ****************************************************************************/
      12             : 
      13             : #ifdef STANDALONE
      14             : #include "gdal_version.h"
      15             : #else
      16             : #undef DO_NOT_DEFINE_GDAL_DATE_NAME
      17             : #include "gdal_version_full/gdal_version.h"
      18             : #endif
      19             : 
      20             : #include "ogr_parquet.h"
      21             : 
      22             : #include "../arrow_common/ograrrowwriterlayer.hpp"
      23             : 
      24             : #include "ogr_wkb.h"
      25             : 
      26             : #include <cassert>
      27             : #include <utility>
      28             : 
      29             : /************************************************************************/
      30             : /*                       OGRParquetWriterLayer()                        */
      31             : /************************************************************************/
      32             : 
      33         364 : OGRParquetWriterLayer::OGRParquetWriterLayer(
      34             :     OGRParquetWriterDataset *poDataset, arrow::MemoryPool *poMemoryPool,
      35             :     const std::shared_ptr<arrow::io::OutputStream> &poOutputStream,
      36         364 :     const char *pszLayerName)
      37             :     : OGRArrowWriterLayer(poMemoryPool, poOutputStream, pszLayerName),
      38         364 :       m_poDataset(poDataset)
      39             : {
      40         364 :     m_bWriteFieldArrowExtensionName = CPLTestBool(
      41             :         CPLGetConfigOption("OGR_PARQUET_WRITE_ARROW_EXTENSION_NAME", "NO"));
      42         364 : }
      43             : 
      44             : /************************************************************************/
      45             : /*                               Close()                                */
      46             : /************************************************************************/
      47             : 
      48         361 : bool OGRParquetWriterLayer::Close()
      49             : {
      50         361 :     if (m_poTmpGPKGLayer)
      51             :     {
      52           3 :         if (!CopyTmpGpkgLayerToFinalFile())
      53           0 :             return false;
      54             :     }
      55             : 
      56         361 :     if (m_bInitializationOK)
      57             :     {
      58         361 :         if (!FinalizeWriting())
      59           0 :             return false;
      60             :     }
      61             : 
      62         361 :     return true;
      63             : }
      64             : 
      65             : /************************************************************************/
      66             : /*                    CopyTmpGpkgLayerToFinalFile()                     */
      67             : /************************************************************************/
      68             : 
      69           3 : bool OGRParquetWriterLayer::CopyTmpGpkgLayerToFinalFile()
      70             : {
      71           3 :     if (!m_poTmpGPKGLayer)
      72             :     {
      73           0 :         return true;
      74             :     }
      75             : 
      76           3 :     CPLDebug("PARQUET", "CopyTmpGpkgLayerToFinalFile(): start...");
      77             : 
      78           3 :     VSIUnlink(m_poTmpGPKG->GetDescription());
      79             : 
      80           6 :     OGRFeature oFeat(m_poFeatureDefn);
      81             : 
      82             :     // Interval in terms of features between 2 debug progress report messages
      83           3 :     constexpr int PROGRESS_FC_INTERVAL = 100 * 1000;
      84             : 
      85             :     // First, write features without geometries
      86             :     {
      87           3 :         auto poTmpLayer = std::unique_ptr<OGRLayer>(m_poTmpGPKG->ExecuteSQL(
      88             :             "SELECT serialized_feature FROM tmp WHERE fid NOT IN (SELECT id "
      89             :             "FROM rtree_tmp_geom)",
      90           3 :             nullptr, nullptr));
      91           3 :         if (!poTmpLayer)
      92           0 :             return false;
      93        1005 :         for (const auto &poSrcFeature : poTmpLayer.get())
      94             :         {
      95        1002 :             int nBytesFeature = 0;
      96             :             const GByte *pabyFeatureData =
      97        1002 :                 poSrcFeature->GetFieldAsBinary(0, &nBytesFeature);
      98        1002 :             if (!oFeat.DeserializeFromBinary(pabyFeatureData, nBytesFeature))
      99             :             {
     100           0 :                 CPLError(CE_Failure, CPLE_AppDefined,
     101             :                          "Cannot deserialize feature");
     102           0 :                 return false;
     103             :             }
     104        1002 :             if (OGRArrowWriterLayer::ICreateFeature(&oFeat) != OGRERR_NONE)
     105             :             {
     106           0 :                 return false;
     107             :             }
     108             : 
     109        1002 :             if ((m_nFeatureCount % PROGRESS_FC_INTERVAL) == 0)
     110             :             {
     111           0 :                 CPLDebugProgress(
     112             :                     "PARQUET",
     113             :                     "CopyTmpGpkgLayerToFinalFile(): %.02f%% progress",
     114           0 :                     100.0 * double(m_nFeatureCount) /
     115           0 :                         double(m_nTmpFeatureCount));
     116             :             }
     117             :         }
     118             : 
     119           3 :         if (!FlushFeatures())
     120             :         {
     121           0 :             return false;
     122             :         }
     123             :     }
     124             : 
     125             :     // Now walk through the GPKG RTree for features with geometries
     126             :     // Cf https://github.com/sqlite/sqlite/blob/master/ext/rtree/rtree.c
     127             :     // for the description of the content of the rtree _node table
     128           6 :     std::vector<std::pair<int64_t, int>> aNodeNoDepthPair;
     129           3 :     int nTreeDepth = 0;
     130             :     // Queue the root node
     131             :     aNodeNoDepthPair.emplace_back(
     132           3 :         std::make_pair(/* nodeNo = */ 1, /* depth = */ 0));
     133           3 :     int nCountWrittenFeaturesSinceLastFlush = 0;
     134          50 :     while (!aNodeNoDepthPair.empty())
     135             :     {
     136          47 :         const auto &oLastPair = aNodeNoDepthPair.back();
     137          47 :         const int64_t nNodeNo = oLastPair.first;
     138          47 :         const int nCurDepth = oLastPair.second;
     139             :         //CPLDebug("PARQUET", "Reading nodeNode=%d, curDepth=%d", int(nNodeNo), nCurDepth);
     140          47 :         aNodeNoDepthPair.pop_back();
     141             : 
     142          47 :         auto poRTreeLayer = std::unique_ptr<OGRLayer>(m_poTmpGPKG->ExecuteSQL(
     143             :             CPLSPrintf("SELECT data FROM rtree_tmp_geom_node WHERE nodeno "
     144             :                        "= " CPL_FRMT_GIB,
     145             :                        static_cast<GIntBig>(nNodeNo)),
     146          47 :             nullptr, nullptr));
     147          47 :         if (!poRTreeLayer)
     148             :         {
     149           0 :             CPLError(CE_Failure, CPLE_AppDefined,
     150             :                      "Cannot read node " CPL_FRMT_GIB,
     151             :                      static_cast<GIntBig>(nNodeNo));
     152           0 :             return false;
     153             :         }
     154             :         const auto poRTreeFeature =
     155          47 :             std::unique_ptr<const OGRFeature>(poRTreeLayer->GetNextFeature());
     156          47 :         if (!poRTreeFeature)
     157             :         {
     158           0 :             CPLError(CE_Failure, CPLE_AppDefined,
     159             :                      "Cannot read node " CPL_FRMT_GIB,
     160             :                      static_cast<GIntBig>(nNodeNo));
     161           0 :             return false;
     162             :         }
     163             : 
     164          47 :         int nNodeBytes = 0;
     165             :         const GByte *pabyNodeData =
     166          47 :             poRTreeFeature->GetFieldAsBinary(0, &nNodeBytes);
     167          47 :         constexpr int BLOB_HEADER_SIZE = 4;
     168          47 :         if (nNodeBytes < BLOB_HEADER_SIZE)
     169             :         {
     170           0 :             CPLError(CE_Failure, CPLE_AppDefined,
     171             :                      "Not enough bytes when reading node " CPL_FRMT_GIB,
     172             :                      static_cast<GIntBig>(nNodeNo));
     173           0 :             return false;
     174             :         }
     175          47 :         if (nNodeNo == 1)
     176             :         {
     177             :             // Get the RTree depth from the root node
     178           3 :             nTreeDepth = (pabyNodeData[0] << 8) | pabyNodeData[1];
     179             :             //CPLDebug("PARQUET", "nTreeDepth = %d", nTreeDepth);
     180             :         }
     181             : 
     182          47 :         const int nCellCount = (pabyNodeData[2] << 8) | pabyNodeData[3];
     183          47 :         constexpr int SIZEOF_CELL = 24;  // int64_t + 4 float
     184          47 :         if (nNodeBytes < BLOB_HEADER_SIZE + SIZEOF_CELL * nCellCount)
     185             :         {
     186           0 :             CPLError(CE_Failure, CPLE_AppDefined,
     187             :                      "Not enough bytes when reading node " CPL_FRMT_GIB,
     188             :                      static_cast<GIntBig>(nNodeNo));
     189           0 :             return false;
     190             :         }
     191             : 
     192          47 :         size_t nOffset = BLOB_HEADER_SIZE;
     193          47 :         if (nCurDepth == nTreeDepth)
     194             :         {
     195             :             // Leaf node: it references feature IDs.
     196             : 
     197             :             // If we are about to go above m_nRowGroupSize, flush past
     198             :             // features now, to improve the spatial compacity of the row group.
     199          45 :             if (m_nRowGroupSize > nCellCount &&
     200          45 :                 nCountWrittenFeaturesSinceLastFlush + nCellCount >
     201          45 :                     m_nRowGroupSize)
     202             :             {
     203          14 :                 nCountWrittenFeaturesSinceLastFlush = 0;
     204          14 :                 if (!FlushFeatures())
     205             :                 {
     206           0 :                     return false;
     207             :                 }
     208             :             }
     209             : 
     210             :             // nCellCount shouldn't be over 51 normally, but even 65535
     211             :             // would be fine...
     212          45 :             assert(nCellCount <= 65535);
     213        1247 :             for (int i = 0; i < nCellCount; ++i)
     214             :             {
     215             :                 int64_t nFID;
     216        1202 :                 memcpy(&nFID, pabyNodeData + nOffset, sizeof(int64_t));
     217        1202 :                 CPL_MSBPTR64(&nFID);
     218             : 
     219             :                 const auto poSrcFeature = std::unique_ptr<const OGRFeature>(
     220        1202 :                     m_poTmpGPKGLayer->GetFeature(nFID));
     221        1202 :                 if (!poSrcFeature)
     222             :                 {
     223           0 :                     CPLError(CE_Failure, CPLE_AppDefined,
     224             :                              "Cannot get feature " CPL_FRMT_GIB,
     225             :                              static_cast<GIntBig>(nFID));
     226           0 :                     return false;
     227             :                 }
     228             : 
     229        1202 :                 int nBytesFeature = 0;
     230             :                 const GByte *pabyFeatureData =
     231        1202 :                     poSrcFeature->GetFieldAsBinary(0, &nBytesFeature);
     232        1202 :                 if (!oFeat.DeserializeFromBinary(pabyFeatureData,
     233             :                                                  nBytesFeature))
     234             :                 {
     235           0 :                     CPLError(CE_Failure, CPLE_AppDefined,
     236             :                              "Cannot deserialize feature");
     237           0 :                     return false;
     238             :                 }
     239        1202 :                 if (OGRArrowWriterLayer::ICreateFeature(&oFeat) != OGRERR_NONE)
     240             :                 {
     241           0 :                     return false;
     242             :                 }
     243             : 
     244        1202 :                 nOffset += SIZEOF_CELL;
     245             : 
     246        1202 :                 ++nCountWrittenFeaturesSinceLastFlush;
     247             : 
     248        1202 :                 if ((m_nFeatureCount % PROGRESS_FC_INTERVAL) == 0 ||
     249        1202 :                     m_nFeatureCount == m_nTmpFeatureCount / 2)
     250             :                 {
     251           2 :                     CPLDebugProgress(
     252             :                         "PARQUET",
     253             :                         "CopyTmpGpkgLayerToFinalFile(): %.02f%% progress",
     254           2 :                         100.0 * double(m_nFeatureCount) /
     255           2 :                             double(m_nTmpFeatureCount));
     256             :                 }
     257             :             }
     258             :         }
     259             :         else
     260             :         {
     261             :             // Non-leaf node: it references child nodes.
     262             : 
     263             :             // nCellCount shouldn't be over 51 normally, but even 65535
     264             :             // would be fine...
     265           2 :             assert(nCellCount <= 65535);
     266          46 :             for (int i = 0; i < nCellCount; ++i)
     267             :             {
     268             :                 int64_t nNode;
     269          44 :                 memcpy(&nNode, pabyNodeData + nOffset, sizeof(int64_t));
     270          44 :                 CPL_MSBPTR64(&nNode);
     271             :                 aNodeNoDepthPair.emplace_back(
     272          44 :                     std::make_pair(nNode, nCurDepth + 1));
     273          44 :                 nOffset += SIZEOF_CELL;
     274             :             }
     275             :         }
     276             :     }
     277             : 
     278           3 :     CPLDebug("PARQUET",
     279             :              "CopyTmpGpkgLayerToFinalFile(): 100%%, successfully finished");
     280           3 :     return true;
     281             : }
     282             : 
     283             : /************************************************************************/
     284             : /*                      IsSupportedGeometryType()                       */
     285             : /************************************************************************/
     286             : 
     287         360 : bool OGRParquetWriterLayer::IsSupportedGeometryType(
     288             :     OGRwkbGeometryType eGType) const
     289             : {
     290         360 :     const auto eFlattenType = wkbFlatten(eGType);
     291         360 :     if (!OGR_GT_HasM(eGType) && eFlattenType <= wkbGeometryCollection)
     292             :     {
     293         359 :         return true;
     294             :     }
     295             : 
     296             :     const auto osConfigOptionName =
     297           3 :         "OGR_" + GetDriverUCName() + "_ALLOW_ALL_DIMS";
     298           1 :     if (CPLTestBool(CPLGetConfigOption(osConfigOptionName.c_str(), "NO")))
     299             :     {
     300           0 :         return true;
     301             :     }
     302             : 
     303           1 :     CPLError(CE_Failure, CPLE_NotSupported,
     304             :              "Only 2D and Z geometry types are supported (unless the "
     305             :              "%s configuration option is set to YES)",
     306             :              osConfigOptionName.c_str());
     307           1 :     return false;
     308             : }
     309             : 
     310             : /************************************************************************/
     311             : /*                             SetOptions()                             */
     312             : /************************************************************************/
     313             : 
     314         364 : bool OGRParquetWriterLayer::SetOptions(
     315             :     const OGRGeomFieldDefn *poSrcGeomFieldDefn, CSLConstList papszOptions)
     316             : {
     317         364 :     m_aosCreationOptions = papszOptions;
     318             : 
     319         364 :     const char *pszWriteCoveringBBox = CSLFetchNameValueDef(
     320             :         papszOptions, "WRITE_COVERING_BBOX",
     321             :         CPLGetConfigOption("OGR_PARQUET_WRITE_COVERING_BBOX", nullptr));
     322         364 :     m_bWriteBBoxStruct =
     323         364 :         pszWriteCoveringBBox == nullptr || CPLTestBool(pszWriteCoveringBBox);
     324             : 
     325             :     m_oBBoxStructFieldName =
     326         364 :         CSLFetchNameValueDef(papszOptions, "COVERING_BBOX_NAME", "");
     327             : 
     328         364 :     if (CPLTestBool(CSLFetchNameValueDef(papszOptions, "SORT_BY_BBOX", "NO")))
     329             :     {
     330           8 :         const std::string osTmpGPKG(std::string(m_poDataset->GetDescription()) +
     331           4 :                                     ".tmp.gpkg");
     332           4 :         auto poGPKGDrv = GetGDALDriverManager()->GetDriverByName("GPKG");
     333           4 :         if (!poGPKGDrv)
     334             :         {
     335           1 :             CPLError(
     336             :                 CE_Failure, CPLE_AppDefined,
     337             :                 "Driver GPKG required for SORT_BY_BBOX layer creation option");
     338           1 :             return false;
     339             :         }
     340           3 :         m_poTmpGPKG.reset(poGPKGDrv->Create(osTmpGPKG.c_str(), 0, 0, 0,
     341             :                                             GDT_Unknown, nullptr));
     342           3 :         if (!m_poTmpGPKG)
     343           0 :             return false;
     344           3 :         m_poTmpGPKG->MarkSuppressOnClose();
     345           3 :         m_poTmpGPKGLayer = m_poTmpGPKG->CreateLayer("tmp");
     346          12 :         if (!m_poTmpGPKGLayer ||
     347             :             // Serialized feature
     348           3 :             m_poTmpGPKGLayer->CreateField(
     349           3 :                 std::make_unique<OGRFieldDefn>("serialized_feature", OFTBinary)
     350           3 :                     .get()) != OGRERR_NONE ||
     351             :             // FlushCache is needed to avoid SQLite3 errors on empty layers
     352           9 :             m_poTmpGPKG->FlushCache() != CE_None ||
     353           3 :             m_poTmpGPKGLayer->StartTransaction() != OGRERR_NONE)
     354             :         {
     355           0 :             return false;
     356             :         }
     357             :     }
     358             : 
     359             :     const char *pszGeomEncoding =
     360         363 :         CSLFetchNameValue(papszOptions, "GEOMETRY_ENCODING");
     361         363 :     m_eGeomEncoding = OGRArrowGeomEncoding::WKB;
     362         363 :     if (pszGeomEncoding)
     363             :     {
     364         211 :         if (EQUAL(pszGeomEncoding, "WKB"))
     365           7 :             m_eGeomEncoding = OGRArrowGeomEncoding::WKB;
     366         204 :         else if (EQUAL(pszGeomEncoding, "WKT"))
     367           8 :             m_eGeomEncoding = OGRArrowGeomEncoding::WKT;
     368         196 :         else if (EQUAL(pszGeomEncoding, "GEOARROW_INTERLEAVED"))
     369             :         {
     370          28 :             CPLErrorOnce(
     371             :                 CE_Warning, CPLE_AppDefined,
     372             :                 "Use of GEOMETRY_ENCODING=GEOARROW_INTERLEAVED is not "
     373             :                 "recommended. "
     374             :                 "GeoParquet 1.1 uses GEOMETRY_ENCODING=GEOARROW (struct) "
     375             :                 "instead.");
     376          28 :             m_eGeomEncoding = OGRArrowGeomEncoding::GEOARROW_FSL_GENERIC;
     377             :         }
     378         168 :         else if (EQUAL(pszGeomEncoding, "GEOARROW") ||
     379           0 :                  EQUAL(pszGeomEncoding, "GEOARROW_STRUCT"))
     380         168 :             m_eGeomEncoding = OGRArrowGeomEncoding::GEOARROW_STRUCT_GENERIC;
     381             :         else
     382             :         {
     383           0 :             CPLError(CE_Failure, CPLE_NotSupported,
     384             :                      "Unsupported GEOMETRY_ENCODING = %s", pszGeomEncoding);
     385           0 :             return false;
     386             :         }
     387             :     }
     388             : 
     389             :     const char *pszCoordPrecision =
     390         363 :         CSLFetchNameValue(papszOptions, "COORDINATE_PRECISION");
     391         363 :     if (pszCoordPrecision)
     392           0 :         m_nWKTCoordinatePrecision = atoi(pszCoordPrecision);
     393             : 
     394         363 :     m_bForceCounterClockwiseOrientation =
     395         363 :         EQUAL(CSLFetchNameValueDef(papszOptions, "POLYGON_ORIENTATION",
     396             :                                    "COUNTERCLOCKWISE"),
     397             :               "COUNTERCLOCKWISE");
     398             : 
     399             :     const auto eGType =
     400         363 :         poSrcGeomFieldDefn ? poSrcGeomFieldDefn->GetType() : wkbNone;
     401         363 :     if (poSrcGeomFieldDefn && eGType != wkbNone)
     402             :     {
     403         334 :         if (!IsSupportedGeometryType(eGType))
     404             :         {
     405           1 :             return false;
     406             :         }
     407             : 
     408         333 :         m_poFeatureDefn->SetGeomType(eGType);
     409         333 :         auto eGeomEncoding = m_eGeomEncoding;
     410         333 :         if (eGeomEncoding == OGRArrowGeomEncoding::GEOARROW_FSL_GENERIC ||
     411         305 :             eGeomEncoding == OGRArrowGeomEncoding::GEOARROW_STRUCT_GENERIC)
     412             :         {
     413         196 :             const auto eEncodingType = eGeomEncoding;
     414         196 :             eGeomEncoding = GetPreciseArrowGeomEncoding(eEncodingType, eGType);
     415         196 :             if (eGeomEncoding == eEncodingType)
     416           0 :                 return false;
     417             :         }
     418         333 :         m_aeGeomEncoding.push_back(eGeomEncoding);
     419             : 
     420         666 :         std::string osGeometryName;
     421             :         const char *pszGeometryName =
     422         333 :             CSLFetchNameValue(papszOptions, "GEOMETRY_NAME");
     423         333 :         if (pszGeometryName)
     424          16 :             osGeometryName = pszGeometryName;
     425         317 :         else if (poSrcGeomFieldDefn->GetNameRef()[0])
     426           4 :             osGeometryName = poSrcGeomFieldDefn->GetNameRef();
     427             :         else
     428         313 :             osGeometryName = "geometry";
     429         333 :         m_poFeatureDefn->GetGeomFieldDefn(0)->SetName(osGeometryName.c_str());
     430             : 
     431         333 :         const auto poSpatialRef = poSrcGeomFieldDefn->GetSpatialRef();
     432         333 :         if (poSpatialRef)
     433             :         {
     434          42 :             auto poSRS = poSpatialRef->Clone();
     435          42 :             m_poFeatureDefn->GetGeomFieldDefn(0)->SetSpatialRef(poSRS);
     436          42 :             poSRS->Release();
     437             :         }
     438             :     }
     439             : 
     440         362 :     m_osFIDColumn = CSLFetchNameValueDef(papszOptions, "FID", "");
     441             : 
     442             :     const char *pszCompression =
     443         362 :         CSLFetchNameValue(papszOptions, GDALMD_COMPRESSION);
     444         362 :     if (pszCompression == nullptr)
     445             :     {
     446        1068 :         auto oResult = arrow::util::Codec::GetCompressionType("snappy");
     447         356 :         if (oResult.ok() && arrow::util::Codec::IsAvailable(*oResult))
     448             :         {
     449         356 :             pszCompression = "SNAPPY";
     450             :         }
     451             :         else
     452             :         {
     453           0 :             pszCompression = "NONE";
     454             :         }
     455             :     }
     456             : 
     457         362 :     if (EQUAL(pszCompression, "NONE"))
     458           0 :         pszCompression = "UNCOMPRESSED";
     459             :     auto oResult = arrow::util::Codec::GetCompressionType(
     460         724 :         CPLString(pszCompression).tolower());
     461         362 :     if (!oResult.ok())
     462             :     {
     463           1 :         CPLError(CE_Failure, CPLE_NotSupported,
     464             :                  "Unrecognized compression method: %s", pszCompression);
     465           1 :         return false;
     466             :     }
     467         361 :     m_eCompression = *oResult;
     468         361 :     if (!arrow::util::Codec::IsAvailable(m_eCompression))
     469             :     {
     470           0 :         CPLError(CE_Failure, CPLE_NotSupported,
     471             :                  "Compression method %s is known, but libarrow has not "
     472             :                  "been built with support for it",
     473             :                  pszCompression);
     474           0 :         return false;
     475             :     }
     476         361 :     m_oWriterPropertiesBuilder.compression(m_eCompression);
     477             : 
     478             :     const char *pszCompressionLevel =
     479         361 :         CSLFetchNameValue(papszOptions, "COMPRESSION_LEVEL");
     480         361 :     if (pszCompressionLevel)
     481             :     {
     482           2 :         const int nCompressionLevel = atoi(pszCompressionLevel);
     483           2 :         if (nCompressionLevel != DEFAULT_COMPRESSION_LEVEL)
     484           2 :             m_oWriterPropertiesBuilder.compression_level(nCompressionLevel);
     485             :     }
     486         359 :     else if (EQUAL(pszCompression, "ZSTD"))
     487           1 :         m_oWriterPropertiesBuilder.compression_level(
     488             :             OGR_PARQUET_ZSTD_DEFAULT_COMPRESSION_LEVEL);
     489             : 
     490             :     const std::string osCreator =
     491         361 :         CSLFetchNameValueDef(papszOptions, "CREATOR", "");
     492         361 :     if (!osCreator.empty())
     493           1 :         m_oWriterPropertiesBuilder.created_by(osCreator);
     494             :     else
     495         360 :         m_oWriterPropertiesBuilder.created_by("GDAL " GDAL_RELEASE_NAME
     496             :                                               ", using " CREATED_BY_VERSION);
     497             : 
     498             :     // Undocumented option. Not clear it is useful besides unit test purposes
     499         361 :     if (!CPLTestBool(CSLFetchNameValueDef(papszOptions, "STATISTICS", "YES")))
     500           1 :         m_oWriterPropertiesBuilder.disable_statistics();
     501             : 
     502             : #if PARQUET_VERSION_MAJOR >= 12
     503             :     // Undocumented option. Not clear it is useful to disable it.
     504         361 :     if (CPLTestBool(CSLFetchNameValueDef(papszOptions, "PAGE_INDEX", "YES")))
     505         361 :         m_oWriterPropertiesBuilder.enable_write_page_index();
     506             : #endif
     507             : 
     508             :     const char *pszWriteGeo =
     509         361 :         CPLGetConfigOption("OGR_PARQUET_WRITE_GEO", nullptr);
     510         361 :     m_bWriteGeoMetadata = pszWriteGeo == nullptr || CPLTestBool(pszWriteGeo);
     511             : 
     512         361 :     if (m_eGeomEncoding == OGRArrowGeomEncoding::WKB && eGType != wkbNone)
     513             :     {
     514             : #if ARROW_VERSION_MAJOR >= 21
     515             :         const char *pszUseParquetGeoTypes =
     516             :             CSLFetchNameValueDef(papszOptions, "USE_PARQUET_GEO_TYPES", "NO");
     517             :         if (EQUAL(pszUseParquetGeoTypes, "ONLY"))
     518             :         {
     519             :             m_bUseArrowWKBExtension = true;
     520             :             if (pszWriteGeo == nullptr)
     521             :                 m_bWriteGeoMetadata = false;
     522             :             if (pszWriteCoveringBBox == nullptr)
     523             :                 m_bWriteBBoxStruct = false;
     524             :         }
     525             :         else
     526             :         {
     527             :             m_bUseArrowWKBExtension = CPLTestBool(pszUseParquetGeoTypes);
     528             :         }
     529             : #else
     530         136 :         m_oWriterPropertiesBuilder.disable_statistics(
     531         408 :             parquet::schema::ColumnPath::FromDotString(
     532         136 :                 m_poFeatureDefn->GetGeomFieldDefn(0)->GetNameRef()));
     533             : #endif
     534             :     }
     535             : 
     536             :     const char *pszRowGroupSize =
     537         361 :         CSLFetchNameValue(papszOptions, "ROW_GROUP_SIZE");
     538         361 :     if (pszRowGroupSize)
     539             :     {
     540          19 :         auto nRowGroupSize = static_cast<int64_t>(atoll(pszRowGroupSize));
     541          19 :         if (nRowGroupSize > 0)
     542             :         {
     543          19 :             if (nRowGroupSize > INT_MAX)
     544           0 :                 nRowGroupSize = INT_MAX;
     545          19 :             m_nRowGroupSize = nRowGroupSize;
     546             :         }
     547             :     }
     548             : 
     549         361 :     m_bEdgesSpherical = EQUAL(
     550             :         CSLFetchNameValueDef(papszOptions, "EDGES", "PLANAR"), "SPHERICAL");
     551             : 
     552         361 :     m_bInitializationOK = true;
     553         361 :     return true;
     554             : }
     555             : 
     556             : /************************************************************************/
     557             : /*                          CloseFileWriter()                           */
     558             : /************************************************************************/
     559             : 
     560         361 : bool OGRParquetWriterLayer::CloseFileWriter()
     561             : {
     562         722 :     auto status = m_poFileWriter->Close();
     563         361 :     if (!status.ok())
     564             :     {
     565           0 :         CPLError(CE_Failure, CPLE_AppDefined,
     566             :                  "FileWriter::Close() failed with %s",
     567           0 :                  status.message().c_str());
     568             :     }
     569         722 :     return status.ok();
     570             : }
     571             : 
     572             : /************************************************************************/
     573             : /*                           GetGeoMetadata()                           */
     574             : /************************************************************************/
     575             : 
     576         361 : std::string OGRParquetWriterLayer::GetGeoMetadata() const
     577             : {
     578             :     // Just for unit testing purposes
     579             :     const char *pszGeoMetadata =
     580         361 :         CPLGetConfigOption("OGR_PARQUET_GEO_METADATA", nullptr);
     581         361 :     if (pszGeoMetadata)
     582          16 :         return pszGeoMetadata;
     583             : 
     584         345 :     if (m_poFeatureDefn->GetGeomFieldCount() != 0 && m_bWriteGeoMetadata)
     585             :     {
     586         650 :         CPLJSONObject oRoot;
     587         325 :         oRoot.Add("version", "1.1.0");
     588         325 :         oRoot.Add("primary_column",
     589         325 :                   m_poFeatureDefn->GetGeomFieldDefn(0)->GetNameRef());
     590         650 :         CPLJSONObject oColumns;
     591         325 :         oRoot.Add("columns", oColumns);
     592         667 :         for (int i = 0; i < m_poFeatureDefn->GetGeomFieldCount(); ++i)
     593             :         {
     594         342 :             const auto poGeomFieldDefn = m_poFeatureDefn->GetGeomFieldDefn(i);
     595         684 :             CPLJSONObject oColumn;
     596         342 :             oColumns.Add(poGeomFieldDefn->GetNameRef(), oColumn);
     597         342 :             oColumn.Add("encoding",
     598         342 :                         GetGeomEncodingAsString(m_aeGeomEncoding[i], true));
     599             : 
     600         342 :             if (CPLTestBool(CPLGetConfigOption("OGR_PARQUET_WRITE_CRS", "YES")))
     601             :             {
     602         341 :                 const auto poSRS = poGeomFieldDefn->GetSpatialRef();
     603         341 :                 if (poSRS)
     604             :                 {
     605          82 :                     OGRSpatialReference oSRSIdentified(IdentifyCRS(poSRS));
     606             : 
     607          41 :                     const char *pszAuthName = oSRSIdentified.GetAuthorityName();
     608          41 :                     const char *pszAuthCode = oSRSIdentified.GetAuthorityCode();
     609             : 
     610          41 :                     bool bOmitCRS = false;
     611          41 :                     if (pszAuthName != nullptr && pszAuthCode != nullptr &&
     612          40 :                         ((EQUAL(pszAuthName, "EPSG") &&
     613          37 :                           EQUAL(pszAuthCode, "4326")) ||
     614          22 :                          (EQUAL(pszAuthName, "OGC") &&
     615           3 :                           EQUAL(pszAuthCode, "CRS84"))))
     616             :                     {
     617             :                         // To make things less confusing for non-geo-aware
     618             :                         // consumers, omit EPSG:4326 / OGC:CRS84 CRS by default
     619          21 :                         bOmitCRS = CPLTestBool(CPLGetConfigOption(
     620             :                             "OGR_PARQUET_CRS_OMIT_IF_WGS84", "YES"));
     621             :                     }
     622             : 
     623          41 :                     if (bOmitCRS)
     624             :                     {
     625             :                         // do nothing
     626             :                     }
     627          20 :                     else if (EQUAL(CPLGetConfigOption(
     628             :                                        "OGR_PARQUET_CRS_ENCODING", "PROJJSON"),
     629             :                                    "PROJJSON"))
     630             :                     {
     631             :                         // CRS encoded as PROJJSON for GeoParquet >= 0.4.0
     632          20 :                         char *pszPROJJSON = nullptr;
     633          20 :                         oSRSIdentified.exportToPROJJSON(&pszPROJJSON, nullptr);
     634          40 :                         CPLJSONDocument oCRSDoc;
     635          20 :                         CPL_IGNORE_RET_VAL(oCRSDoc.LoadMemory(pszPROJJSON));
     636          20 :                         CPLFree(pszPROJJSON);
     637          20 :                         CPLJSONObject oCRSRoot = oCRSDoc.GetRoot();
     638          20 :                         RemoveIDFromMemberOfEnsembles(oCRSRoot);
     639          20 :                         oColumn.Add("crs", oCRSRoot);
     640             :                     }
     641             :                     else
     642             :                     {
     643             :                         // WKT was used in GeoParquet <= 0.3.0
     644           0 :                         const char *const apszOptions[] = {
     645             :                             "FORMAT=WKT2_2019", "MULTILINE=NO", nullptr};
     646           0 :                         char *pszWKT = nullptr;
     647           0 :                         oSRSIdentified.exportToWkt(&pszWKT, apszOptions);
     648           0 :                         if (pszWKT)
     649           0 :                             oColumn.Add("crs", pszWKT);
     650           0 :                         CPLFree(pszWKT);
     651             :                     }
     652             : 
     653          41 :                     const double dfCoordEpoch = poSRS->GetCoordinateEpoch();
     654          41 :                     if (dfCoordEpoch > 0)
     655           2 :                         oColumn.Add("epoch", dfCoordEpoch);
     656             :                 }
     657             :                 else
     658             :                 {
     659         300 :                     oColumn.AddNull("crs");
     660             :                 }
     661             :             }
     662             : 
     663         342 :             if (m_bEdgesSpherical)
     664             :             {
     665           3 :                 oColumn.Add("edges", "spherical");
     666             :             }
     667             : 
     668         643 :             if (m_aoEnvelopes[i].IsInit() &&
     669         301 :                 CPLTestBool(
     670             :                     CPLGetConfigOption("OGR_PARQUET_WRITE_BBOX", "YES")))
     671             :             {
     672         301 :                 bool bHasZ = false;
     673         537 :                 for (const auto eGeomType : m_oSetWrittenGeometryTypes[i])
     674             :                 {
     675         343 :                     bHasZ = CPL_TO_BOOL(OGR_GT_HasZ(eGeomType));
     676         343 :                     if (bHasZ)
     677         107 :                         break;
     678             :                 }
     679         301 :                 CPLJSONArray oBBOX;
     680         301 :                 oBBOX.Add(m_aoEnvelopes[i].MinX);
     681         301 :                 oBBOX.Add(m_aoEnvelopes[i].MinY);
     682         301 :                 if (bHasZ)
     683         107 :                     oBBOX.Add(m_aoEnvelopes[i].MinZ);
     684         301 :                 oBBOX.Add(m_aoEnvelopes[i].MaxX);
     685         301 :                 oBBOX.Add(m_aoEnvelopes[i].MaxY);
     686         301 :                 if (bHasZ)
     687         107 :                     oBBOX.Add(m_aoEnvelopes[i].MaxZ);
     688         301 :                 oColumn.Add("bbox", oBBOX);
     689             :             }
     690             : 
     691             :             // Bounding box column definition
     692         614 :             if (m_bWriteBBoxStruct &&
     693         272 :                 CPLTestBool(CPLGetConfigOption(
     694             :                     "OGR_PARQUET_WRITE_COVERING_BBOX_IN_METADATA", "YES")))
     695             :             {
     696         544 :                 CPLJSONObject oCovering;
     697         272 :                 oColumn.Add("covering", oCovering);
     698         544 :                 CPLJSONObject oBBOX;
     699         272 :                 oCovering.Add("bbox", oBBOX);
     700             :                 const auto AddComponent =
     701        3264 :                     [this, i, &oBBOX](const char *pszComponent)
     702             :                 {
     703        1088 :                     CPLJSONArray oArray;
     704        1088 :                     oArray.Add(m_apoFieldsBBOX[i]->name());
     705        1088 :                     oArray.Add(pszComponent);
     706        1088 :                     oBBOX.Add(pszComponent, oArray);
     707        1088 :                 };
     708         272 :                 AddComponent("xmin");
     709         272 :                 AddComponent("ymin");
     710         272 :                 AddComponent("xmax");
     711         272 :                 AddComponent("ymax");
     712             :             }
     713             : 
     714         359 :             const auto GetStringGeometryType = [](OGRwkbGeometryType eType)
     715             :             {
     716         359 :                 const auto eFlattenType = wkbFlatten(eType);
     717         359 :                 std::string osType = "Unknown";
     718         359 :                 if (wkbPoint == eFlattenType)
     719          85 :                     osType = "Point";
     720         274 :                 else if (wkbLineString == eFlattenType)
     721          42 :                     osType = "LineString";
     722         232 :                 else if (wkbPolygon == eFlattenType)
     723          70 :                     osType = "Polygon";
     724         162 :                 else if (wkbMultiPoint == eFlattenType)
     725          34 :                     osType = "MultiPoint";
     726         128 :                 else if (wkbMultiLineString == eFlattenType)
     727          37 :                     osType = "MultiLineString";
     728          91 :                 else if (wkbMultiPolygon == eFlattenType)
     729          86 :                     osType = "MultiPolygon";
     730           5 :                 else if (wkbGeometryCollection == eFlattenType)
     731           5 :                     osType = "GeometryCollection";
     732         359 :                 if (osType != "Unknown")
     733             :                 {
     734             :                     // M and ZM not supported officially currently, but it
     735             :                     // doesn't hurt to anticipate
     736         359 :                     if (OGR_GT_HasZ(eType) && OGR_GT_HasM(eType))
     737           8 :                         osType += " ZM";
     738         351 :                     else if (OGR_GT_HasZ(eType))
     739         115 :                         osType += " Z";
     740         236 :                     else if (OGR_GT_HasM(eType))
     741           8 :                         osType += " M";
     742             :                 }
     743         359 :                 return osType;
     744             :             };
     745             : 
     746         342 :             if (m_bForceCounterClockwiseOrientation)
     747         341 :                 oColumn.Add("orientation", "counterclockwise");
     748             : 
     749         342 :             CPLJSONArray oArray;
     750         701 :             for (const auto eType : m_oSetWrittenGeometryTypes[i])
     751             :             {
     752         359 :                 oArray.Add(GetStringGeometryType(eType));
     753             :             }
     754         342 :             oColumn.Add("geometry_types", oArray);
     755             :         }
     756             : 
     757         325 :         return oRoot.Format(CPLJSONObject::PrettyFormat::Plain);
     758             :     }
     759          20 :     return std::string();
     760             : }
     761             : 
     762             : /************************************************************************/
     763             : /*                 PerformStepsBeforeFinalFlushGroup()                  */
     764             : /************************************************************************/
     765             : 
     766         361 : void OGRParquetWriterLayer::PerformStepsBeforeFinalFlushGroup()
     767             : {
     768         361 :     if (m_poKeyValueMetadata)
     769             :     {
     770         722 :         std::string osGeoMetadata = GetGeoMetadata();
     771         722 :         auto poTmpSchema = m_poSchema;
     772         361 :         if (!osGeoMetadata.empty())
     773             :         {
     774             :             // HACK: it would be good for Arrow to provide a clean way to alter
     775             :             // key value metadata before finalizing.
     776             :             // We need to write metadata at end to write the bounding box.
     777         341 :             const_cast<arrow::KeyValueMetadata *>(m_poKeyValueMetadata.get())
     778         341 :                 ->Append("geo", osGeoMetadata);
     779             : 
     780         341 :             auto kvMetadata = poTmpSchema->metadata()
     781          15 :                                   ? poTmpSchema->metadata()->Copy()
     782         356 :                                   : std::make_shared<arrow::KeyValueMetadata>();
     783         341 :             kvMetadata->Append("geo", std::move(osGeoMetadata));
     784         341 :             poTmpSchema = poTmpSchema->WithMetadata(kvMetadata);
     785             :         }
     786             : 
     787         361 :         if (CPLTestBool(
     788             :                 CPLGetConfigOption("OGR_PARQUET_WRITE_ARROW_SCHEMA", "YES")))
     789             :         {
     790             :             auto status =
     791         722 :                 ::arrow::ipc::SerializeSchema(*poTmpSchema, m_poMemoryPool);
     792         361 :             if (status.ok())
     793             :             {
     794             :                 // The serialized schema is not UTF-8, which is required for
     795             :                 // Thrift
     796         722 :                 const std::string schema_as_string = (*status)->ToString();
     797             :                 std::string schema_base64 =
     798         361 :                     ::arrow::util::base64_encode(schema_as_string);
     799         361 :                 static const std::string kArrowSchemaKey = "ARROW:schema";
     800             :                 const_cast<arrow::KeyValueMetadata *>(
     801         361 :                     m_poKeyValueMetadata.get())
     802         361 :                     ->Append(kArrowSchemaKey, std::move(schema_base64));
     803             :             }
     804             :         }
     805             : 
     806             :         // Put GDAL metadata into a gdal:metadata domain
     807         722 :         CPLJSONObject oMultiMetadata;
     808         361 :         bool bHasMultiMetadata = false;
     809         369 :         auto &l_oMDMD = oMDMD.GetDomainList() && *(oMDMD.GetDomainList())
     810         369 :                             ? oMDMD
     811         353 :                             : m_poDataset->GetMultiDomainMetadata();
     812         371 :         for (CSLConstList papszDomainIter = l_oMDMD.GetDomainList();
     813         371 :              papszDomainIter && *papszDomainIter; ++papszDomainIter)
     814             :         {
     815          10 :             const char *pszDomain = *papszDomainIter;
     816          10 :             CSLConstList papszMD = l_oMDMD.GetMetadata(pszDomain);
     817          10 :             if (STARTS_WITH(pszDomain, "json:") && papszMD && papszMD[0])
     818             :             {
     819           1 :                 CPLJSONDocument oDoc;
     820           1 :                 if (oDoc.LoadMemory(papszMD[0]))
     821             :                 {
     822           1 :                     bHasMultiMetadata = true;
     823           1 :                     oMultiMetadata.Add(pszDomain, oDoc.GetRoot());
     824           1 :                     continue;
     825           0 :                 }
     826             :             }
     827           9 :             else if (STARTS_WITH(pszDomain, "xml:") && papszMD && papszMD[0])
     828             :             {
     829           1 :                 bHasMultiMetadata = true;
     830           1 :                 oMultiMetadata.Add(pszDomain, papszMD[0]);
     831           1 :                 continue;
     832             :             }
     833          16 :             CPLJSONObject oMetadata;
     834           8 :             bool bHasMetadata = false;
     835          16 :             for (CSLConstList papszMDIter = papszMD;
     836          16 :                  papszMDIter && *papszMDIter; ++papszMDIter)
     837             :             {
     838           8 :                 char *pszKey = nullptr;
     839           8 :                 const char *pszValue = CPLParseNameValue(*papszMDIter, &pszKey);
     840           8 :                 if (pszKey && pszValue)
     841             :                 {
     842           8 :                     bHasMetadata = true;
     843           8 :                     bHasMultiMetadata = true;
     844           8 :                     oMetadata.Add(pszKey, pszValue);
     845             :                 }
     846           8 :                 CPLFree(pszKey);
     847             :             }
     848           8 :             if (bHasMetadata)
     849           8 :                 oMultiMetadata.Add(pszDomain, oMetadata);
     850             :         }
     851         361 :         if (bHasMultiMetadata)
     852             :         {
     853           8 :             const_cast<arrow::KeyValueMetadata *>(m_poKeyValueMetadata.get())
     854           8 :                 ->Append(
     855             :                     "gdal:metadata",
     856          16 :                     oMultiMetadata.Format(CPLJSONObject::PrettyFormat::Plain));
     857             :         }
     858             : 
     859         361 :         if (!m_aosCreationOptions.empty())
     860             :         {
     861         508 :             CPLJSONObject oCreationOptions;
     862         254 :             bool bEmpty = true;
     863        1120 :             for (const auto &[key, value] :
     864        1374 :                  cpl::IterateNameValue(m_aosCreationOptions))
     865             :             {
     866         560 :                 if (!EQUAL(key, "FID") && !EQUAL(key, "GEOMETRY_NAME") &&
     867         521 :                     !EQUAL(key, "EDGES"))
     868             :                 {
     869         517 :                     bEmpty = false;
     870         517 :                     oCreationOptions.Add(key, value);
     871             :                 }
     872             :             }
     873         254 :             if (!bEmpty)
     874             :             {
     875             :                 const_cast<arrow::KeyValueMetadata *>(
     876         240 :                     m_poKeyValueMetadata.get())
     877         240 :                     ->Append("gdal:creation-options",
     878         480 :                              oCreationOptions.Format(
     879             :                                  CPLJSONObject::PrettyFormat::Plain));
     880             :             }
     881             :         }
     882             :     }
     883         361 : }
     884             : 
     885             : /************************************************************************/
     886             : /*                                Open()                                */
     887             : /************************************************************************/
     888             : 
     889             : // Same as parquet::arrow::FileWriter::Open(), except we also
     890             : // return KeyValueMetadata
     891             : static arrow::Status
     892         361 : Open(const ::arrow::Schema &schema, ::arrow::MemoryPool *pool,
     893             :      std::shared_ptr<::arrow::io::OutputStream> sink,
     894             :      std::shared_ptr<parquet::WriterProperties> properties,
     895             :      std::shared_ptr<parquet::ArrowWriterProperties> arrow_properties,
     896             :      std::unique_ptr<parquet::arrow::FileWriter> *writer,
     897             :      std::shared_ptr<const arrow::KeyValueMetadata> *outMetadata)
     898             : {
     899         361 :     std::shared_ptr<parquet::SchemaDescriptor> parquet_schema;
     900         722 :     RETURN_NOT_OK(parquet::arrow::ToParquetSchema(
     901             :         &schema, *properties, *arrow_properties, &parquet_schema));
     902             : 
     903             :     auto schema_node = std::static_pointer_cast<parquet::schema::GroupNode>(
     904         722 :         parquet_schema->schema_root());
     905             : 
     906         361 :     auto metadata = schema.metadata()
     907          20 :                         ? schema.metadata()->Copy()
     908         742 :                         : std::make_shared<arrow::KeyValueMetadata>();
     909         361 :     *outMetadata = metadata;
     910             : 
     911         361 :     std::unique_ptr<parquet::ParquetFileWriter> base_writer;
     912         361 :     PARQUET_CATCH_NOT_OK(base_writer = parquet::ParquetFileWriter::Open(
     913             :                              std::move(sink), std::move(schema_node),
     914             :                              std::move(properties), metadata));
     915             : 
     916         361 :     auto schema_ptr = std::make_shared<::arrow::Schema>(schema);
     917             :     return parquet::arrow::FileWriter::Make(
     918         722 :         pool, std::move(base_writer), std::move(schema_ptr),
     919        1083 :         std::move(arrow_properties), writer);
     920             : }
     921             : 
     922             : /************************************************************************/
     923             : /*                            CreateSchema()                            */
     924             : /************************************************************************/
     925             : 
     926         361 : void OGRParquetWriterLayer::CreateSchema()
     927             : {
     928         361 :     CreateSchemaCommon();
     929         361 : }
     930             : 
     931             : /************************************************************************/
     932             : /*                          CreateGeomField()                           */
     933             : /************************************************************************/
     934             : 
     935          27 : OGRErr OGRParquetWriterLayer::CreateGeomField(const OGRGeomFieldDefn *poField,
     936             :                                               int bApproxOK)
     937             : {
     938          27 :     OGRErr eErr = OGRArrowWriterLayer::CreateGeomField(poField, bApproxOK);
     939          53 :     if (eErr == OGRERR_NONE &&
     940          26 :         m_aeGeomEncoding.back() == OGRArrowGeomEncoding::WKB
     941             : #if ARROW_VERSION_MAJOR < 21
     942             :         // Geostatistics in Arrow 21 do not support geographic type for now
     943          53 :         && m_bEdgesSpherical
     944             : #endif
     945             :     )
     946             :     {
     947           0 :         m_oWriterPropertiesBuilder.disable_statistics(
     948           0 :             parquet::schema::ColumnPath::FromDotString(
     949           0 :                 m_poFeatureDefn
     950           0 :                     ->GetGeomFieldDefn(m_poFeatureDefn->GetGeomFieldCount() - 1)
     951             :                     ->GetNameRef()));
     952             :     }
     953          27 :     return eErr;
     954             : }
     955             : 
     956             : /************************************************************************/
     957             : /*                            CreateWriter()                            */
     958             : /************************************************************************/
     959             : 
     960         361 : void OGRParquetWriterLayer::CreateWriter()
     961             : {
     962         361 :     CPLAssert(m_poFileWriter == nullptr);
     963             : 
     964         361 :     if (m_poSchema == nullptr)
     965             :     {
     966          43 :         CreateSchema();
     967             :     }
     968             :     else
     969             :     {
     970         318 :         FinalizeSchema();
     971             :     }
     972             : 
     973             :     auto arrowWriterProperties =
     974         361 :         parquet::ArrowWriterProperties::Builder().store_schema()->build();
     975        1083 :     CPL_IGNORE_RET_VAL(Open(*m_poSchema, m_poMemoryPool, m_poOutputStream,
     976         722 :                             m_oWriterPropertiesBuilder.build(),
     977         361 :                             std::move(arrowWriterProperties), &m_poFileWriter,
     978             :                             &m_poKeyValueMetadata));
     979         361 : }
     980             : 
     981             : /************************************************************************/
     982             : /*                           ICreateFeature()                           */
     983             : /************************************************************************/
     984             : 
     985        3390 : OGRErr OGRParquetWriterLayer::ICreateFeature(OGRFeature *poFeature)
     986             : {
     987             :     // If not using SORT_BY_BBOX=YES layer creation option, we can directly
     988             :     // write features to the final Parquet file
     989        3390 :     if (!m_poTmpGPKGLayer)
     990        1186 :         return OGRArrowWriterLayer::ICreateFeature(poFeature);
     991             : 
     992             :     // SORT_BY_BBOX=YES case: we write for now a serialized version of poFeature
     993             :     // in a temporary GeoPackage file.
     994             : 
     995        2204 :     GIntBig nFID = poFeature->GetFID();
     996        2204 :     if (!m_osFIDColumn.empty() && nFID == OGRNullFID)
     997             :     {
     998        1102 :         nFID = m_nTmpFeatureCount;
     999        1102 :         poFeature->SetFID(nFID);
    1000             :     }
    1001        2204 :     ++m_nTmpFeatureCount;
    1002             : 
    1003        4408 :     std::vector<GByte> abyBuffer;
    1004             :     // Serialize the source feature as a single array of bytes to preserve it
    1005             :     // fully
    1006        2204 :     if (!poFeature->SerializeToBinary(abyBuffer))
    1007             :     {
    1008           0 :         return OGRERR_FAILURE;
    1009             :     }
    1010             : 
    1011             :     // SQLite3 limitation: a row must fit in slightly less than 1 GB.
    1012        2204 :     constexpr int SOME_MARGIN = 128;
    1013        2204 :     if (abyBuffer.size() > 1024 * 1024 * 1024 - SOME_MARGIN)
    1014             :     {
    1015           0 :         CPLError(CE_Failure, CPLE_NotSupported,
    1016             :                  "Features larger than 1 GB are not supported");
    1017           0 :         return OGRERR_FAILURE;
    1018             :     }
    1019             : 
    1020        4408 :     OGRFeature oFeat(m_poTmpGPKGLayer->GetLayerDefn());
    1021        2204 :     oFeat.SetFID(nFID);
    1022        2204 :     oFeat.SetField(0, static_cast<int>(abyBuffer.size()), abyBuffer.data());
    1023        2204 :     const auto poSrcGeom = poFeature->GetGeometryRef();
    1024        2204 :     if (poSrcGeom && !poSrcGeom->IsEmpty())
    1025             :     {
    1026             :         // For the purpose of building an RTree, just use the bounding box of
    1027             :         // the geometry as the geometry.
    1028        1202 :         OGREnvelope sEnvelope;
    1029        1202 :         poSrcGeom->getEnvelope(&sEnvelope);
    1030        2404 :         auto poPoly = std::make_unique<OGRPolygon>();
    1031        2404 :         auto poLR = std::make_unique<OGRLinearRing>();
    1032        1202 :         poLR->addPoint(sEnvelope.MinX, sEnvelope.MinY);
    1033        1202 :         poLR->addPoint(sEnvelope.MinX, sEnvelope.MaxY);
    1034        1202 :         poLR->addPoint(sEnvelope.MaxX, sEnvelope.MaxY);
    1035        1202 :         poLR->addPoint(sEnvelope.MaxX, sEnvelope.MinY);
    1036        1202 :         poLR->addPoint(sEnvelope.MinX, sEnvelope.MinY);
    1037        1202 :         poPoly->addRingDirectly(poLR.release());
    1038        1202 :         oFeat.SetGeometryDirectly(poPoly.release());
    1039             :     }
    1040        2204 :     return m_poTmpGPKGLayer->CreateFeature(&oFeat);
    1041             : }
    1042             : 
    1043             : /************************************************************************/
    1044             : /*                             FlushGroup()                             */
    1045             : /************************************************************************/
    1046             : 
    1047         333 : bool OGRParquetWriterLayer::FlushGroup()
    1048             : {
    1049             : #if PARQUET_VERSION_MAJOR >= 20
    1050             :     auto status = m_poFileWriter->NewRowGroup();
    1051             : #else
    1052         666 :     auto status = m_poFileWriter->NewRowGroup(m_apoBuilders[0]->length());
    1053             : #endif
    1054         333 :     if (!status.ok())
    1055             :     {
    1056           0 :         CPLError(CE_Failure, CPLE_AppDefined, "NewRowGroup() failed with %s",
    1057           0 :                  status.message().c_str());
    1058           0 :         ClearArrayBuilers();
    1059           0 :         return false;
    1060             :     }
    1061             : 
    1062         333 :     auto ret = WriteArrays(
    1063        1291 :         [this](const std::shared_ptr<arrow::Field> &field,
    1064        1291 :                const std::shared_ptr<arrow::Array> &array)
    1065             :         {
    1066        2582 :             auto l_status = m_poFileWriter->WriteColumnChunk(*array);
    1067        1291 :             if (!l_status.ok())
    1068             :             {
    1069           0 :                 CPLError(CE_Failure, CPLE_AppDefined,
    1070             :                          "WriteColumnChunk() failed for field %s: %s",
    1071           0 :                          field->name().c_str(), l_status.message().c_str());
    1072           0 :                 return false;
    1073             :             }
    1074        1291 :             return true;
    1075             :         });
    1076             : 
    1077         333 :     ClearArrayBuilers();
    1078         333 :     return ret;
    1079             : }
    1080             : 
    1081             : /************************************************************************/
    1082             : /*                   FixupWKBGeometryBeforeWriting()                    */
    1083             : /************************************************************************/
    1084             : 
    1085          51 : void OGRParquetWriterLayer::FixupWKBGeometryBeforeWriting(GByte *pabyWkb,
    1086             :                                                           size_t nLen)
    1087             : {
    1088          51 :     if (!m_bForceCounterClockwiseOrientation)
    1089           0 :         return;
    1090             : 
    1091          51 :     OGRWKBFixupCounterClockWiseExternalRing(pabyWkb, nLen);
    1092             : }
    1093             : 
    1094             : /************************************************************************/
    1095             : /*                     FixupGeometryBeforeWriting()                     */
    1096             : /************************************************************************/
    1097             : 
    1098        1429 : void OGRParquetWriterLayer::FixupGeometryBeforeWriting(OGRGeometry *poGeom)
    1099             : {
    1100        1429 :     if (!m_bForceCounterClockwiseOrientation)
    1101           3 :         return;
    1102             : 
    1103        1426 :     const auto eFlattenType = wkbFlatten(poGeom->getGeometryType());
    1104             :     // Polygon rings MUST follow the right-hand rule for orientation
    1105             :     // (counterclockwise external rings, clockwise internal rings)
    1106        1426 :     if (eFlattenType == wkbPolygon)
    1107             :     {
    1108          74 :         bool bFirstRing = true;
    1109         151 :         for (auto poRing : poGeom->toPolygon())
    1110             :         {
    1111          85 :             if ((bFirstRing && poRing->isClockwise()) ||
    1112           8 :                 (!bFirstRing && !poRing->isClockwise()))
    1113             :             {
    1114          72 :                 poRing->reversePoints();
    1115             :             }
    1116          77 :             bFirstRing = false;
    1117             :         }
    1118             :     }
    1119        1352 :     else if (eFlattenType == wkbMultiPolygon ||
    1120             :              eFlattenType == wkbGeometryCollection)
    1121             :     {
    1122         135 :         for (auto poSubGeom : poGeom->toGeometryCollection())
    1123             :         {
    1124          71 :             FixupGeometryBeforeWriting(poSubGeom);
    1125             :         }
    1126             :     }
    1127             : }
    1128             : 
    1129             : /************************************************************************/
    1130             : /*                          WriteArrowBatch()                           */
    1131             : /************************************************************************/
    1132             : 
    1133             : #if PARQUET_VERSION_MAJOR > 10
    1134             : inline bool
    1135          25 : OGRParquetWriterLayer::WriteArrowBatch(const struct ArrowSchema *schema,
    1136             :                                        struct ArrowArray *array,
    1137             :                                        CSLConstList papszOptions)
    1138             : {
    1139          25 :     if (m_poTmpGPKGLayer)
    1140             :     {
    1141             :         // When using SORT_BY_BBOX=YES option, we can't directly write the
    1142             :         // input array, because we need to sort features. Hence we fallback
    1143             :         // to the OGRLayer base implementation, which will ultimately call
    1144             :         // OGRParquetWriterLayer::ICreateFeature()
    1145           0 :         return OGRLayer::WriteArrowBatch(schema, array, papszOptions);
    1146             :     }
    1147             : 
    1148          50 :     return WriteArrowBatchInternal(
    1149             :         schema, array, papszOptions,
    1150          50 :         [this](const std::shared_ptr<arrow::RecordBatch> &poBatch)
    1151             :         {
    1152          50 :             auto status = m_poFileWriter->NewBufferedRowGroup();
    1153          25 :             if (!status.ok())
    1154             :             {
    1155           0 :                 CPLError(CE_Failure, CPLE_AppDefined,
    1156             :                          "NewBufferedRowGroup() failed with %s",
    1157           0 :                          status.message().c_str());
    1158           0 :                 return false;
    1159             :             }
    1160             : 
    1161          25 :             status = m_poFileWriter->WriteRecordBatch(*poBatch);
    1162          25 :             if (!status.ok())
    1163             :             {
    1164           0 :                 CPLError(CE_Failure, CPLE_AppDefined,
    1165             :                          "WriteRecordBatch() failed: %s",
    1166           0 :                          status.message().c_str());
    1167           0 :                 return false;
    1168             :             }
    1169             : 
    1170          25 :             return true;
    1171          25 :         });
    1172             : }
    1173             : #endif
    1174             : 
    1175             : /************************************************************************/
    1176             : /*                           TestCapability()                           */
    1177             : /************************************************************************/
    1178             : 
    1179         694 : inline int OGRParquetWriterLayer::TestCapability(const char *pszCap) const
    1180             : {
    1181             : #if PARQUET_VERSION_MAJOR <= 10
    1182             :     if (EQUAL(pszCap, OLCFastWriteArrowBatch))
    1183             :         return false;
    1184             : #endif
    1185             : 
    1186         694 :     if (m_poTmpGPKGLayer && EQUAL(pszCap, OLCFastWriteArrowBatch))
    1187             :     {
    1188             :         // When using SORT_BY_BBOX=YES option, we can't directly write the
    1189             :         // input array, because we need to sort features. So this is not
    1190             :         // fast
    1191           1 :         return false;
    1192             :     }
    1193             : 
    1194         693 :     return OGRArrowWriterLayer::TestCapability(pszCap);
    1195             : }
    1196             : 
    1197             : /************************************************************************/
    1198             : /*                     CreateFieldFromArrowSchema()                     */
    1199             : /************************************************************************/
    1200             : 
    1201             : #if PARQUET_VERSION_MAJOR > 10
    1202         479 : bool OGRParquetWriterLayer::CreateFieldFromArrowSchema(
    1203             :     const struct ArrowSchema *schema, CSLConstList papszOptions)
    1204             : {
    1205         479 :     if (m_poTmpGPKGLayer)
    1206             :     {
    1207             :         // When using SORT_BY_BBOX=YES option, we can't directly write the
    1208             :         // input array, because we need to sort features. But this process
    1209             :         // only supports the base Arrow types supported by
    1210             :         // OGRLayer::WriteArrowBatch()
    1211           0 :         return OGRLayer::CreateFieldFromArrowSchema(schema, papszOptions);
    1212             :     }
    1213             : 
    1214         479 :     return OGRArrowWriterLayer::CreateFieldFromArrowSchema(schema,
    1215         479 :                                                            papszOptions);
    1216             : }
    1217             : #endif
    1218             : 
    1219             : /************************************************************************/
    1220             : /*                       IsArrowSchemaSupported()                       */
    1221             : /************************************************************************/
    1222             : 
    1223             : #if PARQUET_VERSION_MAJOR > 10
    1224        1095 : bool OGRParquetWriterLayer::IsArrowSchemaSupported(
    1225             :     const struct ArrowSchema *schema, CSLConstList papszOptions,
    1226             :     std::string &osErrorMsg) const
    1227             : {
    1228        1095 :     if (m_poTmpGPKGLayer)
    1229             :     {
    1230             :         // When using SORT_BY_BBOX=YES option, we can't directly write the
    1231             :         // input array, because we need to sort features. But this process
    1232             :         // only supports the base Arrow types supported by
    1233             :         // OGRLayer::WriteArrowBatch()
    1234           0 :         return OGRLayer::IsArrowSchemaSupported(schema, papszOptions,
    1235           0 :                                                 osErrorMsg);
    1236             :     }
    1237             : 
    1238        1095 :     if (schema->format[0] == 'e' && schema->format[1] == 0)
    1239             :     {
    1240           1 :         osErrorMsg = "float16 not supported";
    1241           1 :         return false;
    1242             :     }
    1243        1094 :     if (schema->format[0] == 'v' && schema->format[1] == 'u')
    1244             :     {
    1245           1 :         osErrorMsg = "StringView not supported";
    1246           1 :         return false;
    1247             :     }
    1248        1093 :     if (schema->format[0] == 'v' && schema->format[1] == 'z')
    1249             :     {
    1250           1 :         osErrorMsg = "BinaryView not supported";
    1251           1 :         return false;
    1252             :     }
    1253        1092 :     if (schema->format[0] == '+' && schema->format[1] == 'v')
    1254             :     {
    1255           0 :         if (schema->format[2] == 'l')
    1256             :         {
    1257           0 :             osErrorMsg = "ListView not supported";
    1258           0 :             return false;
    1259             :         }
    1260           0 :         else if (schema->format[2] == 'L')
    1261             :         {
    1262           0 :             osErrorMsg = "LargeListView not supported";
    1263           0 :             return false;
    1264             :         }
    1265             :     }
    1266        2169 :     for (int64_t i = 0; i < schema->n_children; ++i)
    1267             :     {
    1268        1080 :         if (!IsArrowSchemaSupported(schema->children[i], papszOptions,
    1269             :                                     osErrorMsg))
    1270             :         {
    1271           3 :             return false;
    1272             :         }
    1273             :     }
    1274        1089 :     return true;
    1275             : }
    1276             : #endif
    1277             : 
    1278             : /************************************************************************/
    1279             : /*                            SetMetadata()                             */
    1280             : /************************************************************************/
    1281             : 
    1282          13 : CPLErr OGRParquetWriterLayer::SetMetadata(CSLConstList papszMetadata,
    1283             :                                           const char *pszDomain)
    1284             : {
    1285          13 :     if (!pszDomain || !EQUAL(pszDomain, "SHAPEFILE"))
    1286             :     {
    1287           8 :         return OGRLayer::SetMetadata(papszMetadata, pszDomain);
    1288             :     }
    1289           5 :     return CE_None;
    1290             : }
    1291             : 
    1292             : /************************************************************************/
    1293             : /*                             GetDataset()                             */
    1294             : /************************************************************************/
    1295             : 
    1296          26 : GDALDataset *OGRParquetWriterLayer::GetDataset()
    1297             : {
    1298          26 :     return m_poDataset;
    1299             : }

Generated by: LCOV version 1.14