LCOV - code coverage report
Current view: top level - ogr/ogrsf_frmts/parquet - ogrparquetdriver.cpp (source / functions) Hit Total Coverage
Test: gdal_filtered.info Lines: 289 329 87.8 %
Date: 2025-01-18 12:42:00 Functions: 11 11 100.0 %

          Line data    Source code
       1             : /******************************************************************************
       2             :  *
       3             :  * Project:  Parquet Translator
       4             :  * Purpose:  Implements OGRParquetDriver.
       5             :  * Author:   Even Rouault, <even.rouault at spatialys.com>
       6             :  *
       7             :  ******************************************************************************
       8             :  * Copyright (c) 2022, Planet Labs
       9             :  *
      10             :  * SPDX-License-Identifier: MIT
      11             :  ****************************************************************************/
      12             : 
      13             : #include "gdal_pam.h"
      14             : #include "ogrsf_frmts.h"
      15             : 
      16             : #include <algorithm>
      17             : #include <map>
      18             : #include <tuple>
      19             : 
      20             : #include "ogr_parquet.h"
      21             : #include "ogrparquetdrivercore.h"
      22             : 
      23             : #include "../arrow_common/ograrrowrandomaccessfile.h"
      24             : #include "../arrow_common/vsiarrowfilesystem.hpp"
      25             : #include "../arrow_common/ograrrowwritablefile.h"
      26             : #include "../arrow_common/ograrrowdataset.hpp"
      27             : #include "../arrow_common/ograrrowlayer.hpp"  // for the destructor
      28             : 
      29             : #ifdef GDAL_USE_ARROWDATASET
      30             : 
      31             : /************************************************************************/
      32             : /*                      OpenFromDatasetFactory()                        */
      33             : /************************************************************************/
      34             : 
      35         273 : static GDALDataset *OpenFromDatasetFactory(
      36             :     const std::string &osBasePath,
      37             :     const std::shared_ptr<arrow::dataset::DatasetFactory> &factory,
      38             :     CSLConstList papszOpenOptions,
      39             :     const std::shared_ptr<arrow::fs::FileSystem> &fs)
      40             : {
      41         273 :     std::shared_ptr<arrow::dataset::Dataset> dataset;
      42         546 :     PARQUET_ASSIGN_OR_THROW(dataset, factory->Finish());
      43             : 
      44             :     auto poMemoryPool = std::shared_ptr<arrow::MemoryPool>(
      45         546 :         arrow::MemoryPool::CreateDefault().release());
      46             : 
      47         273 :     const bool bIsVSI = STARTS_WITH(osBasePath.c_str(), "/vsi");
      48         546 :     auto poDS = std::make_unique<OGRParquetDataset>(poMemoryPool);
      49             :     auto poLayer = std::make_unique<OGRParquetDatasetLayer>(
      50         546 :         poDS.get(), CPLGetBasenameSafe(osBasePath.c_str()).c_str(), bIsVSI,
      51         546 :         dataset, papszOpenOptions);
      52         273 :     poDS->SetLayer(std::move(poLayer));
      53         273 :     poDS->SetFileSystem(fs);
      54         546 :     return poDS.release();
      55             : }
      56             : 
      57             : /************************************************************************/
      58             : /*                         GetFileSystem()                              */
      59             : /************************************************************************/
      60             : 
      61             : static std::tuple<std::shared_ptr<arrow::fs::FileSystem>, std::string>
      62         273 : GetFileSystem(std::string &osBasePathInOut,
      63             :               const std::string &osQueryParameters)
      64             : {
      65             :     // Instantiate file system:
      66             :     // - VSIArrowFileSystem implementation for /vsi files
      67             :     // - base implementation for local files (if OGR_PARQUET_USE_VSI set to NO)
      68         273 :     std::shared_ptr<arrow::fs::FileSystem> fs;
      69         273 :     const bool bIsVSI = STARTS_WITH(osBasePathInOut.c_str(), "/vsi");
      70             :     VSIStatBufL sStat;
      71         546 :     std::string osFSFilename;
      72         459 :     if ((bIsVSI ||
      73         538 :          CPLTestBool(CPLGetConfigOption("OGR_PARQUET_USE_VSI", "YES"))) &&
      74         265 :         VSIStatL(osBasePathInOut.c_str(), &sStat) == 0)
      75             :     {
      76         264 :         osFSFilename = osBasePathInOut;
      77         264 :         fs = std::make_shared<VSIArrowFileSystem>("PARQUET", osQueryParameters);
      78             :     }
      79             :     else
      80             :     {
      81             :         // FileSystemFromUriOrPath() doesn't like relative paths
      82             :         // so transform them to absolute.
      83           9 :         std::string osPath(osBasePathInOut);
      84           9 :         if (CPLIsFilenameRelative(osPath.c_str()))
      85             :         {
      86           8 :             char *pszCurDir = CPLGetCurrentDir();
      87           8 :             if (pszCurDir == nullptr)
      88           0 :                 return {nullptr, osFSFilename};
      89           8 :             osPath = CPLFormFilenameSafe(pszCurDir, osPath.c_str(), nullptr);
      90           8 :             CPLFree(pszCurDir);
      91             :         }
      92           9 :         PARQUET_ASSIGN_OR_THROW(
      93             :             fs, arrow::fs::FileSystemFromUriOrPath(osPath, &osFSFilename));
      94             :     }
      95         273 :     return {fs, osFSFilename};
      96             : }
      97             : 
      98             : /************************************************************************/
      99             : /*                  OpenParquetDatasetWithMetadata()                    */
     100             : /************************************************************************/
     101             : 
     102          18 : static GDALDataset *OpenParquetDatasetWithMetadata(
     103             :     const std::string &osBasePathIn, const char *pszMetadataFile,
     104             :     const std::string &osQueryParameters, CSLConstList papszOpenOptions)
     105             : {
     106          36 :     std::string osBasePath(osBasePathIn);
     107          18 :     const auto &[fs, osFSFilename] =
     108          36 :         GetFileSystem(osBasePath, osQueryParameters);
     109             : 
     110          36 :     arrow::dataset::ParquetFactoryOptions options;
     111          36 :     auto partitioningFactory = arrow::dataset::HivePartitioning::MakeFactory();
     112             :     options.partitioning =
     113          18 :         arrow::dataset::PartitioningOrFactory(std::move(partitioningFactory));
     114             : 
     115          18 :     std::shared_ptr<arrow::dataset::DatasetFactory> factory;
     116             :     // coverity[copy_constructor_call]
     117          54 :     PARQUET_ASSIGN_OR_THROW(
     118             :         factory, arrow::dataset::ParquetDatasetFactory::Make(
     119             :                      osFSFilename + '/' + pszMetadataFile, fs,
     120             :                      std::make_shared<arrow::dataset::ParquetFileFormat>(),
     121             :                      std::move(options)));
     122             : 
     123          36 :     return OpenFromDatasetFactory(osBasePath, factory, papszOpenOptions, fs);
     124             : }
     125             : 
     126             : /************************************************************************/
     127             : /*                 OpenParquetDatasetWithoutMetadata()                  */
     128             : /************************************************************************/
     129             : 
     130             : static GDALDataset *
     131         255 : OpenParquetDatasetWithoutMetadata(const std::string &osBasePathIn,
     132             :                                   const std::string &osQueryParameters,
     133             :                                   CSLConstList papszOpenOptions)
     134             : {
     135         510 :     std::string osBasePath(osBasePathIn);
     136         255 :     const auto &[fs, osFSFilename] =
     137         510 :         GetFileSystem(osBasePath, osQueryParameters);
     138             : 
     139         510 :     arrow::dataset::FileSystemFactoryOptions options;
     140         255 :     std::shared_ptr<arrow::dataset::DatasetFactory> factory;
     141             : 
     142         510 :     const auto fileInfo = fs->GetFileInfo(osFSFilename);
     143         255 :     if (fileInfo->IsFile())
     144             :     {
     145             :         // coverity[copy_constructor_call]
     146        1008 :         PARQUET_ASSIGN_OR_THROW(
     147             :             factory, arrow::dataset::FileSystemDatasetFactory::Make(
     148             :                          fs, {std::move(osFSFilename)},
     149             :                          std::make_shared<arrow::dataset::ParquetFileFormat>(),
     150             :                          std::move(options)));
     151             :     }
     152             :     else
     153             :     {
     154             :         auto partitioningFactory =
     155           6 :             arrow::dataset::HivePartitioning::MakeFactory();
     156           6 :         options.partitioning = arrow::dataset::PartitioningOrFactory(
     157           6 :             std::move(partitioningFactory));
     158             : 
     159           6 :         arrow::fs::FileSelector selector;
     160           3 :         selector.base_dir = std::move(osFSFilename);
     161           3 :         selector.recursive = true;
     162             : 
     163             :         // coverity[copy_constructor_call]
     164           6 :         PARQUET_ASSIGN_OR_THROW(
     165             :             factory, arrow::dataset::FileSystemDatasetFactory::Make(
     166             :                          fs, std::move(selector),
     167             :                          std::make_shared<arrow::dataset::ParquetFileFormat>(),
     168             :                          std::move(options)));
     169             :     }
     170             : 
     171         510 :     return OpenFromDatasetFactory(osBasePath, factory, papszOpenOptions, fs);
     172             : }
     173             : 
     174             : #endif
     175             : 
     176             : /************************************************************************/
     177             : /*                  BuildMemDatasetWithRowGroupExtents()                */
     178             : /************************************************************************/
     179             : 
     180             : /** Builds a Memory dataset that contains, for each row-group of the input file,
     181             :  * the feature count and spatial extent of the features of this row group,
     182             :  * using Parquet statistics. This assumes that the Parquet file declares
     183             :  * a "covering":{"bbox":{ ... }} metadata item.
     184             :  *
     185             :  * Only for debug purposes.
     186             :  */
     187           1 : static GDALDataset *BuildMemDatasetWithRowGroupExtents(OGRParquetLayer *poLayer)
     188             : {
     189           1 :     int iParquetXMin = -1;
     190           1 :     int iParquetYMin = -1;
     191           1 :     int iParquetXMax = -1;
     192           1 :     int iParquetYMax = -1;
     193           1 :     if (poLayer->GeomColsBBOXParquet(0, iParquetXMin, iParquetYMin,
     194             :                                      iParquetXMax, iParquetYMax))
     195             :     {
     196           1 :         auto poMemDrv = GetGDALDriverManager()->GetDriverByName("Memory");
     197           1 :         if (!poMemDrv)
     198           0 :             return nullptr;
     199             :         auto poMemDS = std::unique_ptr<GDALDataset>(
     200           2 :             poMemDrv->Create("", 0, 0, 0, GDT_Unknown, nullptr));
     201           1 :         if (!poMemDS)
     202           0 :             return nullptr;
     203           1 :         OGRSpatialReference *poTmpSRS = nullptr;
     204           1 :         const auto poSrcSRS = poLayer->GetSpatialRef();
     205           1 :         if (poSrcSRS)
     206           0 :             poTmpSRS = poSrcSRS->Clone();
     207             :         auto poMemLayer =
     208           1 :             poMemDS->CreateLayer("footprint", poTmpSRS, wkbPolygon, nullptr);
     209           1 :         if (poTmpSRS)
     210           0 :             poTmpSRS->Release();
     211           1 :         if (!poMemLayer)
     212           0 :             return nullptr;
     213           1 :         poMemLayer->CreateField(
     214           1 :             std::make_unique<OGRFieldDefn>("feature_count", OFTInteger64)
     215           1 :                 .get());
     216             : 
     217             :         const auto metadata =
     218           2 :             poLayer->GetReader()->parquet_reader()->metadata();
     219           1 :         const int numRowGroups = metadata->num_row_groups();
     220          15 :         for (int iRowGroup = 0; iRowGroup < numRowGroups; ++iRowGroup)
     221             :         {
     222          28 :             std::string osMinTmp, osMaxTmp;
     223             :             OGRField unusedF;
     224             :             bool unusedB;
     225             :             OGRFieldSubType unusedSubType;
     226             : 
     227             :             OGRField sXMin;
     228          14 :             OGR_RawField_SetNull(&sXMin);
     229          14 :             bool bFoundXMin = false;
     230          14 :             OGRFieldType eXMinType = OFTMaxType;
     231             : 
     232             :             OGRField sYMin;
     233          14 :             OGR_RawField_SetNull(&sYMin);
     234          14 :             bool bFoundYMin = false;
     235          14 :             OGRFieldType eYMinType = OFTMaxType;
     236             : 
     237             :             OGRField sXMax;
     238          14 :             OGR_RawField_SetNull(&sXMax);
     239          14 :             bool bFoundXMax = false;
     240          14 :             OGRFieldType eXMaxType = OFTMaxType;
     241             : 
     242             :             OGRField sYMax;
     243          14 :             OGR_RawField_SetNull(&sYMax);
     244          14 :             bool bFoundYMax = false;
     245          14 :             OGRFieldType eYMaxType = OFTMaxType;
     246             : 
     247          14 :             if (poLayer->GetMinMaxForParquetCol(
     248             :                     iRowGroup, iParquetXMin, nullptr,
     249             :                     /* bComputeMin = */ true, sXMin, bFoundXMin,
     250             :                     /* bComputeMax = */ false, unusedF, unusedB, eXMinType,
     251           8 :                     unusedSubType, osMinTmp, osMaxTmp) &&
     252           8 :                 bFoundXMin && eXMinType == OFTReal &&
     253          22 :                 poLayer->GetMinMaxForParquetCol(
     254             :                     iRowGroup, iParquetYMin, nullptr,
     255             :                     /* bComputeMin = */ true, sYMin, bFoundYMin,
     256             :                     /* bComputeMax = */ false, unusedF, unusedB, eYMinType,
     257           8 :                     unusedSubType, osMinTmp, osMaxTmp) &&
     258           8 :                 bFoundYMin && eYMinType == OFTReal &&
     259          22 :                 poLayer->GetMinMaxForParquetCol(
     260             :                     iRowGroup, iParquetXMax, nullptr,
     261             :                     /* bComputeMin = */ false, unusedF, unusedB,
     262             :                     /* bComputeMax = */ true, sXMax, bFoundXMax, eXMaxType,
     263           8 :                     unusedSubType, osMaxTmp, osMaxTmp) &&
     264           8 :                 bFoundXMax && eXMaxType == OFTReal &&
     265          22 :                 poLayer->GetMinMaxForParquetCol(
     266             :                     iRowGroup, iParquetYMax, nullptr,
     267             :                     /* bComputeMin = */ false, unusedF, unusedB,
     268             :                     /* bComputeMax = */ true, sYMax, bFoundYMax, eYMaxType,
     269           8 :                     unusedSubType, osMaxTmp, osMaxTmp) &&
     270          22 :                 bFoundYMax && eYMaxType == OFTReal)
     271             :             {
     272          16 :                 OGRFeature oFeat(poMemLayer->GetLayerDefn());
     273           8 :                 oFeat.SetField(0,
     274             :                                static_cast<GIntBig>(
     275           8 :                                    metadata->RowGroup(iRowGroup)->num_rows()));
     276          16 :                 auto poPoly = std::make_unique<OGRPolygon>();
     277           8 :                 auto poLR = std::make_unique<OGRLinearRing>();
     278           8 :                 poLR->addPoint(sXMin.Real, sYMin.Real);
     279           8 :                 poLR->addPoint(sXMin.Real, sYMax.Real);
     280           8 :                 poLR->addPoint(sXMax.Real, sYMax.Real);
     281           8 :                 poLR->addPoint(sXMax.Real, sYMin.Real);
     282           8 :                 poLR->addPoint(sXMin.Real, sYMin.Real);
     283           8 :                 poPoly->addRingDirectly(poLR.release());
     284           8 :                 oFeat.SetGeometryDirectly(poPoly.release());
     285           8 :                 CPL_IGNORE_RET_VAL(poMemLayer->CreateFeature(&oFeat));
     286             :             }
     287             :         }
     288             : 
     289           1 :         return poMemDS.release();
     290             :     }
     291           0 :     return nullptr;
     292             : }
     293             : 
     294             : /************************************************************************/
     295             : /*                                Open()                                */
     296             : /************************************************************************/
     297             : 
     298        1603 : static GDALDataset *OGRParquetDriverOpen(GDALOpenInfo *poOpenInfo)
     299             : {
     300        1603 :     if (poOpenInfo->eAccess == GA_Update)
     301          61 :         return nullptr;
     302             : 
     303             : #ifdef GDAL_USE_ARROWDATASET
     304        3084 :     std::string osBasePath(poOpenInfo->pszFilename);
     305        3084 :     std::string osQueryParameters;
     306             :     const bool bStartedWithParquetPrefix =
     307        1542 :         STARTS_WITH(osBasePath.c_str(), "PARQUET:");
     308             : 
     309        1542 :     if (bStartedWithParquetPrefix)
     310             :     {
     311         261 :         osBasePath = osBasePath.substr(strlen("PARQUET:"));
     312             :     }
     313             : 
     314             :     // Little trick to allow using syntax of
     315             :     // https://github.com/opengeospatial/geoparquet/discussions/101
     316             :     // ogrinfo
     317             :     // "/vsicurl/https://ai4edataeuwest.blob.core.windows.net/us-census/2020/cb_2020_us_vtd_500k.parquet?${SAS_TOKEN}"
     318        1542 :     if (STARTS_WITH(osBasePath.c_str(), "/vsicurl/"))
     319             :     {
     320           2 :         const auto nPos = osBasePath.find(".parquet?st=");
     321           2 :         if (nPos != std::string::npos)
     322             :         {
     323           0 :             osQueryParameters = osBasePath.substr(nPos + strlen(".parquet"));
     324           0 :             osBasePath.resize(nPos + strlen(".parquet"));
     325             :         }
     326             :     }
     327             : 
     328        2390 :     if (bStartedWithParquetPrefix || poOpenInfo->bIsDirectory ||
     329         848 :         !osQueryParameters.empty())
     330             :     {
     331             :         VSIStatBufL sStat;
     332         694 :         if (!osBasePath.empty() && osBasePath.back() == '/')
     333           0 :             osBasePath.pop_back();
     334             :         const std::string osMetadataPath =
     335         694 :             CPLFormFilenameSafe(osBasePath.c_str(), "_metadata", nullptr);
     336         694 :         if (CPLTestBool(
     337        2082 :                 CPLGetConfigOption("OGR_PARQUET_USE_METADATA_FILE", "YES")) &&
     338        1388 :             VSIStatL((osMetadataPath + osQueryParameters).c_str(), &sStat) == 0)
     339             :         {
     340             :             // If there's a _metadata file, then use it to avoid listing files
     341             :             try
     342             :             {
     343          36 :                 return OpenParquetDatasetWithMetadata(
     344             :                     osBasePath, "_metadata", osQueryParameters,
     345          18 :                     poOpenInfo->papszOpenOptions);
     346             :             }
     347           0 :             catch (const std::exception &e)
     348             :             {
     349           0 :                 CPLError(CE_Failure, CPLE_AppDefined, "Parquet exception: %s",
     350           0 :                          e.what());
     351             :             }
     352           0 :             return nullptr;
     353             :         }
     354             :         else
     355             :         {
     356         676 :             bool bLikelyParquetDataset = false;
     357         676 :             if (poOpenInfo->bIsDirectory)
     358             :             {
     359             :                 // Detect if the directory contains .parquet files, or
     360             :                 // subdirectories with a name of the form "key=value", typical
     361             :                 // of HIVE partitioning.
     362         846 :                 const CPLStringList aosFiles(VSIReadDir(osBasePath.c_str()));
     363       22266 :                 for (const char *pszFilename : cpl::Iterate(aosFiles))
     364             :                 {
     365       21845 :                     if (EQUAL(CPLGetExtensionSafe(pszFilename).c_str(),
     366             :                               "parquet"))
     367             :                     {
     368           2 :                         bLikelyParquetDataset = true;
     369           2 :                         break;
     370             :                     }
     371       21843 :                     else if (strchr(pszFilename, '='))
     372             :                     {
     373             :                         // HIVE partitioning
     374           0 :                         if (VSIStatL(CPLFormFilenameSafe(osBasePath.c_str(),
     375             :                                                          pszFilename, nullptr)
     376             :                                          .c_str(),
     377           0 :                                      &sStat) == 0 &&
     378           0 :                             VSI_ISDIR(sStat.st_mode))
     379             :                         {
     380           0 :                             bLikelyParquetDataset = true;
     381           0 :                             break;
     382             :                         }
     383             :                     }
     384             :                 }
     385             :             }
     386             : 
     387         676 :             if (bStartedWithParquetPrefix || bLikelyParquetDataset)
     388             :             {
     389             :                 try
     390             :                 {
     391         510 :                     return OpenParquetDatasetWithoutMetadata(
     392             :                         osBasePath, osQueryParameters,
     393         255 :                         poOpenInfo->papszOpenOptions);
     394             :                 }
     395           0 :                 catch (const std::exception &e)
     396             :                 {
     397             :                     // If we aren't quite sure that the passed file name is
     398             :                     // a directory, then silently continue
     399           0 :                     if (poOpenInfo->bIsDirectory)
     400             :                     {
     401           0 :                         CPLError(CE_Failure, CPLE_AppDefined,
     402           0 :                                  "Parquet exception: %s", e.what());
     403           0 :                         return nullptr;
     404             :                     }
     405             :                 }
     406             :             }
     407             :         }
     408             :     }
     409             : #endif
     410             : 
     411        1269 :     if (!OGRParquetDriverIdentify(poOpenInfo))
     412             :     {
     413           0 :         return nullptr;
     414             :     }
     415             : 
     416        1269 :     if (poOpenInfo->bIsDirectory)
     417         421 :         return nullptr;
     418             : 
     419        1696 :     std::string osFilename(poOpenInfo->pszFilename);
     420         848 :     if (STARTS_WITH(poOpenInfo->pszFilename, "PARQUET:"))
     421             :     {
     422           0 :         osFilename = poOpenInfo->pszFilename + strlen("PARQUET:");
     423             :     }
     424             : 
     425             :     try
     426             :     {
     427         848 :         std::shared_ptr<arrow::io::RandomAccessFile> infile;
     428        1312 :         if (STARTS_WITH(osFilename.c_str(), "/vsi") ||
     429         464 :             CPLTestBool(CPLGetConfigOption("OGR_PARQUET_USE_VSI", "NO")))
     430             :         {
     431         384 :             VSIVirtualHandleUniquePtr fp(poOpenInfo->fpL);
     432         384 :             poOpenInfo->fpL = nullptr;
     433         384 :             if (fp == nullptr)
     434             :             {
     435           0 :                 fp.reset(VSIFOpenL(osFilename.c_str(), "rb"));
     436           0 :                 if (fp == nullptr)
     437           0 :                     return nullptr;
     438             :             }
     439         768 :             infile = std::make_shared<OGRArrowRandomAccessFile>(osFilename,
     440         768 :                                                                 std::move(fp));
     441             :         }
     442             :         else
     443             :         {
     444         464 :             PARQUET_ASSIGN_OR_THROW(infile,
     445             :                                     arrow::io::ReadableFile::Open(osFilename));
     446             :         }
     447             : 
     448             :         // Open Parquet file reader
     449         848 :         std::unique_ptr<parquet::arrow::FileReader> arrow_reader;
     450             :         auto poMemoryPool = std::shared_ptr<arrow::MemoryPool>(
     451        1696 :             arrow::MemoryPool::CreateDefault().release());
     452             : #if ARROW_VERSION_MAJOR >= 19
     453        2544 :         PARQUET_ASSIGN_OR_THROW(
     454             :             arrow_reader,
     455             :             parquet::arrow::OpenFile(std::move(infile), poMemoryPool.get()));
     456             : #else
     457             :         auto st = parquet::arrow::OpenFile(std::move(infile),
     458             :                                            poMemoryPool.get(), &arrow_reader);
     459             :         if (!st.ok())
     460             :         {
     461             :             CPLError(CE_Failure, CPLE_AppDefined,
     462             :                      "parquet::arrow::OpenFile() failed");
     463             :             return nullptr;
     464             :         }
     465             : #endif
     466             : 
     467        1694 :         auto poDS = std::make_unique<OGRParquetDataset>(poMemoryPool);
     468             :         auto poLayer = std::make_unique<OGRParquetLayer>(
     469        1694 :             poDS.get(), CPLGetBasenameSafe(osFilename.c_str()).c_str(),
     470        2541 :             std::move(arrow_reader), poOpenInfo->papszOpenOptions);
     471             : 
     472             :         // For debug purposes: return a layer with the extent of each row group
     473         847 :         if (CPLTestBool(
     474             :                 CPLGetConfigOption("OGR_PARQUET_SHOW_ROW_GROUP_EXTENT", "NO")))
     475             :         {
     476           1 :             return BuildMemDatasetWithRowGroupExtents(poLayer.get());
     477             :         }
     478             : 
     479         846 :         poDS->SetLayer(std::move(poLayer));
     480         846 :         return poDS.release();
     481             :     }
     482           1 :     catch (const std::exception &e)
     483             :     {
     484           1 :         CPLError(CE_Failure, CPLE_AppDefined, "Parquet exception: %s",
     485           1 :                  e.what());
     486           1 :         return nullptr;
     487             :     }
     488             : }
     489             : 
     490             : /************************************************************************/
     491             : /*                               Create()                               */
     492             : /************************************************************************/
     493             : 
     494         264 : static GDALDataset *OGRParquetDriverCreate(const char *pszName, int nXSize,
     495             :                                            int nYSize, int nBands,
     496             :                                            GDALDataType eType,
     497             :                                            char ** /* papszOptions */)
     498             : {
     499         264 :     if (!(nXSize == 0 && nYSize == 0 && nBands == 0 && eType == GDT_Unknown))
     500           0 :         return nullptr;
     501             : 
     502             :     try
     503             :     {
     504         264 :         std::shared_ptr<arrow::io::OutputStream> out_file;
     505         342 :         if (STARTS_WITH(pszName, "/vsi") ||
     506          78 :             CPLTestBool(CPLGetConfigOption("OGR_PARQUET_USE_VSI", "YES")))
     507             :         {
     508         264 :             VSILFILE *fp = VSIFOpenL(pszName, "wb");
     509         264 :             if (fp == nullptr)
     510             :             {
     511           1 :                 CPLError(CE_Failure, CPLE_FileIO, "Cannot create %s", pszName);
     512           1 :                 return nullptr;
     513             :             }
     514         263 :             out_file = std::make_shared<OGRArrowWritableFile>(fp);
     515             :         }
     516             :         else
     517             :         {
     518           0 :             PARQUET_ASSIGN_OR_THROW(out_file,
     519             :                                     arrow::io::FileOutputStream::Open(pszName));
     520             :         }
     521             : 
     522         263 :         return new OGRParquetWriterDataset(out_file);
     523             :     }
     524           0 :     catch (const std::exception &e)
     525             :     {
     526           0 :         CPLError(CE_Failure, CPLE_AppDefined, "Parquet exception: %s",
     527           0 :                  e.what());
     528           0 :         return nullptr;
     529             :     }
     530             : }
     531             : 
     532             : /************************************************************************/
     533             : /*                         OGRParquetDriver()                           */
     534             : /************************************************************************/
     535             : 
     536             : class OGRParquetDriver final : public GDALDriver
     537             : {
     538             :     bool m_bMetadataInitialized = false;
     539             :     void InitMetadata();
     540             : 
     541             :   public:
     542        1756 :     const char *GetMetadataItem(const char *pszName,
     543             :                                 const char *pszDomain) override
     544             :     {
     545        1756 :         if (EQUAL(pszName, GDAL_DS_LAYER_CREATIONOPTIONLIST))
     546             :         {
     547         296 :             InitMetadata();
     548             :         }
     549        1756 :         return GDALDriver::GetMetadataItem(pszName, pszDomain);
     550             :     }
     551             : 
     552          40 :     char **GetMetadata(const char *pszDomain) override
     553             :     {
     554          40 :         InitMetadata();
     555          40 :         return GDALDriver::GetMetadata(pszDomain);
     556             :     }
     557             : };
     558             : 
     559         336 : void OGRParquetDriver::InitMetadata()
     560             : {
     561         336 :     if (m_bMetadataInitialized)
     562         321 :         return;
     563          15 :     m_bMetadataInitialized = true;
     564             : 
     565             :     CPLXMLTreeCloser oTree(
     566          30 :         CPLCreateXMLNode(nullptr, CXT_Element, "LayerCreationOptionList"));
     567             : 
     568          30 :     std::vector<const char *> apszCompressionMethods;
     569          15 :     bool bHasSnappy = false;
     570         105 :     for (const char *pszMethod :
     571         120 :          {"SNAPPY", "GZIP", "BROTLI", "ZSTD", "LZ4_RAW", "LZO", "LZ4_HADOOP"})
     572             :     {
     573             :         auto oResult = arrow::util::Codec::GetCompressionType(
     574         210 :             CPLString(pszMethod).tolower());
     575         105 :         if (oResult.ok() && arrow::util::Codec::IsAvailable(*oResult))
     576             :         {
     577          90 :             if (EQUAL(pszMethod, "SNAPPY"))
     578          15 :                 bHasSnappy = true;
     579          90 :             apszCompressionMethods.emplace_back(pszMethod);
     580             :         }
     581             :     }
     582             : 
     583             :     {
     584          15 :         auto psOption = CPLCreateXMLNode(oTree.get(), CXT_Element, "Option");
     585          15 :         CPLAddXMLAttributeAndValue(psOption, "name", "COMPRESSION");
     586          15 :         CPLAddXMLAttributeAndValue(psOption, "type", "string-select");
     587          15 :         CPLAddXMLAttributeAndValue(psOption, "description",
     588             :                                    "Compression method");
     589          15 :         CPLAddXMLAttributeAndValue(psOption, "default",
     590             :                                    bHasSnappy ? "SNAPPY" : "NONE");
     591             :         {
     592          15 :             auto poValueNode = CPLCreateXMLNode(psOption, CXT_Element, "Value");
     593          15 :             CPLAddXMLAttributeAndValue(poValueNode, "alias", "UNCOMPRESSED");
     594          15 :             CPLCreateXMLNode(poValueNode, CXT_Text, "NONE");
     595             :         }
     596         105 :         for (const char *pszMethod : apszCompressionMethods)
     597             :         {
     598          90 :             auto poValueNode = CPLCreateXMLNode(psOption, CXT_Element, "Value");
     599          90 :             CPLCreateXMLNode(poValueNode, CXT_Text, pszMethod);
     600             :         }
     601             :     }
     602             : 
     603             :     {
     604          15 :         auto psOption = CPLCreateXMLNode(oTree.get(), CXT_Element, "Option");
     605          15 :         CPLAddXMLAttributeAndValue(psOption, "name", "GEOMETRY_ENCODING");
     606          15 :         CPLAddXMLAttributeAndValue(psOption, "type", "string-select");
     607          15 :         CPLAddXMLAttributeAndValue(psOption, "description",
     608             :                                    "Encoding of geometry columns");
     609          15 :         CPLAddXMLAttributeAndValue(psOption, "default", "WKB");
     610          60 :         for (const char *pszEncoding :
     611          75 :              {"WKB", "WKT", "GEOARROW", "GEOARROW_INTERLEAVED"})
     612             :         {
     613          60 :             auto poValueNode = CPLCreateXMLNode(psOption, CXT_Element, "Value");
     614          60 :             CPLCreateXMLNode(poValueNode, CXT_Text, pszEncoding);
     615          60 :             if (EQUAL(pszEncoding, "GEOARROW"))
     616          15 :                 CPLAddXMLAttributeAndValue(poValueNode, "alias",
     617             :                                            "GEOARROW_STRUCT");
     618             :         }
     619             :     }
     620             : 
     621             :     {
     622          15 :         auto psOption = CPLCreateXMLNode(oTree.get(), CXT_Element, "Option");
     623          15 :         CPLAddXMLAttributeAndValue(psOption, "name", "ROW_GROUP_SIZE");
     624          15 :         CPLAddXMLAttributeAndValue(psOption, "type", "integer");
     625          15 :         CPLAddXMLAttributeAndValue(psOption, "description",
     626             :                                    "Maximum number of rows per group");
     627          15 :         CPLAddXMLAttributeAndValue(psOption, "default", "65536");
     628             :     }
     629             : 
     630             :     {
     631          15 :         auto psOption = CPLCreateXMLNode(oTree.get(), CXT_Element, "Option");
     632          15 :         CPLAddXMLAttributeAndValue(psOption, "name", "GEOMETRY_NAME");
     633          15 :         CPLAddXMLAttributeAndValue(psOption, "type", "string");
     634          15 :         CPLAddXMLAttributeAndValue(psOption, "description",
     635             :                                    "Name of geometry column");
     636          15 :         CPLAddXMLAttributeAndValue(psOption, "default", "geometry");
     637             :     }
     638             : 
     639             :     {
     640          15 :         auto psOption = CPLCreateXMLNode(oTree.get(), CXT_Element, "Option");
     641          15 :         CPLAddXMLAttributeAndValue(psOption, "name", "COORDINATE_PRECISION");
     642          15 :         CPLAddXMLAttributeAndValue(psOption, "type", "float");
     643          15 :         CPLAddXMLAttributeAndValue(psOption, "description",
     644             :                                    "Number of decimals for coordinates (only "
     645             :                                    "for GEOMETRY_ENCODING=WKT)");
     646             :     }
     647             : 
     648             :     {
     649          15 :         auto psOption = CPLCreateXMLNode(oTree.get(), CXT_Element, "Option");
     650          15 :         CPLAddXMLAttributeAndValue(psOption, "name", "FID");
     651          15 :         CPLAddXMLAttributeAndValue(psOption, "type", "string");
     652          15 :         CPLAddXMLAttributeAndValue(psOption, "description",
     653             :                                    "Name of the FID column to create");
     654             :     }
     655             : 
     656             :     {
     657          15 :         auto psOption = CPLCreateXMLNode(oTree.get(), CXT_Element, "Option");
     658          15 :         CPLAddXMLAttributeAndValue(psOption, "name", "POLYGON_ORIENTATION");
     659          15 :         CPLAddXMLAttributeAndValue(psOption, "type", "string-select");
     660          15 :         CPLAddXMLAttributeAndValue(
     661             :             psOption, "description",
     662             :             "Which ring orientation to use for polygons");
     663          15 :         CPLAddXMLAttributeAndValue(psOption, "default", "COUNTERCLOCKWISE");
     664          15 :         CPLCreateXMLElementAndValue(psOption, "Value", "COUNTERCLOCKWISE");
     665          15 :         CPLCreateXMLElementAndValue(psOption, "Value", "UNMODIFIED");
     666             :     }
     667             : 
     668             :     {
     669          15 :         auto psOption = CPLCreateXMLNode(oTree.get(), CXT_Element, "Option");
     670          15 :         CPLAddXMLAttributeAndValue(psOption, "name", "EDGES");
     671          15 :         CPLAddXMLAttributeAndValue(psOption, "type", "string-select");
     672          15 :         CPLAddXMLAttributeAndValue(
     673             :             psOption, "description",
     674             :             "Name of the coordinate system for the edges");
     675          15 :         CPLAddXMLAttributeAndValue(psOption, "default", "PLANAR");
     676          15 :         CPLCreateXMLElementAndValue(psOption, "Value", "PLANAR");
     677          15 :         CPLCreateXMLElementAndValue(psOption, "Value", "SPHERICAL");
     678             :     }
     679             : 
     680             :     {
     681          15 :         auto psOption = CPLCreateXMLNode(oTree.get(), CXT_Element, "Option");
     682          15 :         CPLAddXMLAttributeAndValue(psOption, "name", "CREATOR");
     683          15 :         CPLAddXMLAttributeAndValue(psOption, "type", "string");
     684          15 :         CPLAddXMLAttributeAndValue(psOption, "description",
     685             :                                    "Name of creating application");
     686             :     }
     687             : 
     688             :     {
     689          15 :         auto psOption = CPLCreateXMLNode(oTree.get(), CXT_Element, "Option");
     690          15 :         CPLAddXMLAttributeAndValue(psOption, "name", "WRITE_COVERING_BBOX");
     691          15 :         CPLAddXMLAttributeAndValue(psOption, "type", "boolean");
     692          15 :         CPLAddXMLAttributeAndValue(psOption, "default", "YES");
     693          15 :         CPLAddXMLAttributeAndValue(psOption, "description",
     694             :                                    "Whether to write xmin/ymin/xmax/ymax "
     695             :                                    "columns with the bounding box of "
     696             :                                    "geometries");
     697             :     }
     698             : 
     699             :     {
     700          15 :         auto psOption = CPLCreateXMLNode(oTree.get(), CXT_Element, "Option");
     701          15 :         CPLAddXMLAttributeAndValue(psOption, "name", "SORT_BY_BBOX");
     702          15 :         CPLAddXMLAttributeAndValue(psOption, "type", "boolean");
     703          15 :         CPLAddXMLAttributeAndValue(psOption, "default", "NO");
     704          15 :         CPLAddXMLAttributeAndValue(psOption, "description",
     705             :                                    "Whether features should be sorted based on "
     706             :                                    "the bounding box of their geometries");
     707             :     }
     708             : 
     709          15 :     char *pszXML = CPLSerializeXMLTree(oTree.get());
     710          15 :     GDALDriver::SetMetadataItem(GDAL_DS_LAYER_CREATIONOPTIONLIST, pszXML);
     711          15 :     CPLFree(pszXML);
     712             : }
     713             : 
     714             : /************************************************************************/
     715             : /*                         RegisterOGRParquet()                         */
     716             : /************************************************************************/
     717             : 
     718          30 : void RegisterOGRParquet()
     719             : {
     720          30 :     if (GDALGetDriverByName(DRIVER_NAME) != nullptr)
     721           0 :         return;
     722             : 
     723          60 :     auto poDriver = std::make_unique<OGRParquetDriver>();
     724          30 :     OGRParquetDriverSetCommonMetadata(poDriver.get());
     725             : 
     726          30 :     poDriver->pfnOpen = OGRParquetDriverOpen;
     727          30 :     poDriver->pfnCreate = OGRParquetDriverCreate;
     728             : 
     729          30 :     poDriver->SetMetadataItem("ARROW_VERSION", ARROW_VERSION_STRING);
     730             : #ifdef GDAL_USE_ARROWDATASET
     731          30 :     poDriver->SetMetadataItem("ARROW_DATASET", "YES");
     732             : #endif
     733             : 
     734          30 :     GetGDALDriverManager()->RegisterDriver(poDriver.release());
     735             : 
     736             : #if ARROW_VERSION_MAJOR >= 16
     737             :     // Mostly for tests
     738             :     const char *pszPath =
     739          30 :         CPLGetConfigOption("OGR_PARQUET_LOAD_FILE_SYSTEM_FACTORIES", nullptr);
     740          30 :     if (pszPath)
     741             :     {
     742           0 :         auto result = arrow::fs::LoadFileSystemFactories(pszPath);
     743           0 :         if (!result.ok())
     744             :         {
     745           0 :             CPLError(CE_Warning, CPLE_AppDefined,
     746             :                      "arrow::fs::LoadFileSystemFactories() failed with %s",
     747           0 :                      result.message().c_str());
     748             :         }
     749             :     }
     750             : #endif
     751             : }

Generated by: LCOV version 1.14