Line data Source code
1 : /******************************************************************************
2 : *
3 : * Project: Parquet Translator
4 : * Purpose: Implements OGRParquetDriver.
5 : * Author: Even Rouault, <even.rouault at spatialys.com>
6 : *
7 : ******************************************************************************
8 : * Copyright (c) 2022, Planet Labs
9 : *
10 : * SPDX-License-Identifier: MIT
11 : ****************************************************************************/
12 :
13 : #include "gdal_pam.h"
14 : #include "ogrsf_frmts.h"
15 :
16 : #include <algorithm>
17 : #include <map>
18 : #include <tuple>
19 :
20 : #include "ogr_parquet.h"
21 : #include "ogrparquetdrivercore.h"
22 :
23 : #include "../arrow_common/ograrrowrandomaccessfile.h"
24 : #include "../arrow_common/vsiarrowfilesystem.hpp"
25 : #include "../arrow_common/ograrrowwritablefile.h"
26 : #include "../arrow_common/ograrrowdataset.hpp"
27 : #include "../arrow_common/ograrrowlayer.hpp" // for the destructor
28 :
29 : #ifdef GDAL_USE_ARROWDATASET
30 :
31 : /************************************************************************/
32 : /* OpenFromDatasetFactory() */
33 : /************************************************************************/
34 :
35 272 : static GDALDataset *OpenFromDatasetFactory(
36 : const std::string &osBasePath,
37 : const std::shared_ptr<arrow::dataset::DatasetFactory> &factory,
38 : CSLConstList papszOpenOptions,
39 : const std::shared_ptr<arrow::fs::FileSystem> &fs)
40 : {
41 272 : std::shared_ptr<arrow::dataset::Dataset> dataset;
42 544 : PARQUET_ASSIGN_OR_THROW(dataset, factory->Finish());
43 :
44 : auto poMemoryPool = std::shared_ptr<arrow::MemoryPool>(
45 544 : arrow::MemoryPool::CreateDefault().release());
46 :
47 272 : const bool bIsVSI = STARTS_WITH(osBasePath.c_str(), "/vsi");
48 544 : auto poDS = std::make_unique<OGRParquetDataset>(poMemoryPool);
49 : auto poLayer = std::make_unique<OGRParquetDatasetLayer>(
50 272 : poDS.get(), CPLGetBasename(osBasePath.c_str()), bIsVSI, dataset,
51 544 : papszOpenOptions);
52 272 : poDS->SetLayer(std::move(poLayer));
53 272 : poDS->SetFileSystem(fs);
54 544 : return poDS.release();
55 : }
56 :
57 : /************************************************************************/
58 : /* GetFileSystem() */
59 : /************************************************************************/
60 :
61 : static std::tuple<std::shared_ptr<arrow::fs::FileSystem>, std::string>
62 272 : GetFileSystem(std::string &osBasePathInOut,
63 : const std::string &osQueryParameters)
64 : {
65 : // Instantiate file system:
66 : // - VSIArrowFileSystem implementation for /vsi files
67 : // - base implementation for local files (if OGR_PARQUET_USE_VSI set to NO)
68 272 : std::shared_ptr<arrow::fs::FileSystem> fs;
69 272 : const bool bIsVSI = STARTS_WITH(osBasePathInOut.c_str(), "/vsi");
70 : VSIStatBufL sStat;
71 544 : std::string osFSFilename;
72 458 : if ((bIsVSI ||
73 536 : CPLTestBool(CPLGetConfigOption("OGR_PARQUET_USE_VSI", "YES"))) &&
74 264 : VSIStatL(osBasePathInOut.c_str(), &sStat) == 0)
75 : {
76 263 : osFSFilename = osBasePathInOut;
77 263 : fs = std::make_shared<VSIArrowFileSystem>("PARQUET", osQueryParameters);
78 : }
79 : else
80 : {
81 : // FileSystemFromUriOrPath() doesn't like relative paths
82 : // so transform them to absolute.
83 9 : std::string osPath(osBasePathInOut);
84 9 : if (CPLIsFilenameRelative(osPath.c_str()))
85 : {
86 8 : char *pszCurDir = CPLGetCurrentDir();
87 8 : if (pszCurDir == nullptr)
88 0 : return {nullptr, osFSFilename};
89 8 : osPath = CPLFormFilename(pszCurDir, osPath.c_str(), nullptr);
90 8 : CPLFree(pszCurDir);
91 : }
92 9 : PARQUET_ASSIGN_OR_THROW(
93 : fs, arrow::fs::FileSystemFromUriOrPath(osPath, &osFSFilename));
94 : }
95 272 : return {fs, osFSFilename};
96 : }
97 :
98 : /************************************************************************/
99 : /* OpenParquetDatasetWithMetadata() */
100 : /************************************************************************/
101 :
102 18 : static GDALDataset *OpenParquetDatasetWithMetadata(
103 : const std::string &osBasePathIn, const char *pszMetadataFile,
104 : const std::string &osQueryParameters, CSLConstList papszOpenOptions)
105 : {
106 36 : std::string osBasePath(osBasePathIn);
107 18 : const auto &[fs, osFSFilename] =
108 36 : GetFileSystem(osBasePath, osQueryParameters);
109 :
110 36 : arrow::dataset::ParquetFactoryOptions options;
111 36 : auto partitioningFactory = arrow::dataset::HivePartitioning::MakeFactory();
112 : options.partitioning =
113 18 : arrow::dataset::PartitioningOrFactory(std::move(partitioningFactory));
114 :
115 18 : std::shared_ptr<arrow::dataset::DatasetFactory> factory;
116 : // coverity[copy_constructor_call]
117 54 : PARQUET_ASSIGN_OR_THROW(
118 : factory, arrow::dataset::ParquetDatasetFactory::Make(
119 : osFSFilename + '/' + pszMetadataFile, fs,
120 : std::make_shared<arrow::dataset::ParquetFileFormat>(),
121 : std::move(options)));
122 :
123 36 : return OpenFromDatasetFactory(osBasePath, factory, papszOpenOptions, fs);
124 : }
125 :
126 : /************************************************************************/
127 : /* OpenParquetDatasetWithoutMetadata() */
128 : /************************************************************************/
129 :
130 : static GDALDataset *
131 254 : OpenParquetDatasetWithoutMetadata(const std::string &osBasePathIn,
132 : const std::string &osQueryParameters,
133 : CSLConstList papszOpenOptions)
134 : {
135 508 : std::string osBasePath(osBasePathIn);
136 254 : const auto &[fs, osFSFilename] =
137 508 : GetFileSystem(osBasePath, osQueryParameters);
138 :
139 508 : arrow::dataset::FileSystemFactoryOptions options;
140 254 : std::shared_ptr<arrow::dataset::DatasetFactory> factory;
141 :
142 508 : const auto fileInfo = fs->GetFileInfo(osFSFilename);
143 254 : if (fileInfo->IsFile())
144 : {
145 : // coverity[copy_constructor_call]
146 1008 : PARQUET_ASSIGN_OR_THROW(
147 : factory, arrow::dataset::FileSystemDatasetFactory::Make(
148 : fs, {std::move(osFSFilename)},
149 : std::make_shared<arrow::dataset::ParquetFileFormat>(),
150 : std::move(options)));
151 : }
152 : else
153 : {
154 : auto partitioningFactory =
155 4 : arrow::dataset::HivePartitioning::MakeFactory();
156 4 : options.partitioning = arrow::dataset::PartitioningOrFactory(
157 4 : std::move(partitioningFactory));
158 :
159 4 : arrow::fs::FileSelector selector;
160 2 : selector.base_dir = std::move(osFSFilename);
161 2 : selector.recursive = true;
162 :
163 : // coverity[copy_constructor_call]
164 4 : PARQUET_ASSIGN_OR_THROW(
165 : factory, arrow::dataset::FileSystemDatasetFactory::Make(
166 : fs, std::move(selector),
167 : std::make_shared<arrow::dataset::ParquetFileFormat>(),
168 : std::move(options)));
169 : }
170 :
171 508 : return OpenFromDatasetFactory(osBasePath, factory, papszOpenOptions, fs);
172 : }
173 :
174 : #endif
175 :
176 : /************************************************************************/
177 : /* BuildMemDatasetWithRowGroupExtents() */
178 : /************************************************************************/
179 :
180 : /** Builds a Memory dataset that contains, for each row-group of the input file,
181 : * the feature count and spatial extent of the features of this row group,
182 : * using Parquet statistics. This assumes that the Parquet file declares
183 : * a "covering":{"bbox":{ ... }} metadata item.
184 : *
185 : * Only for debug purposes.
186 : */
187 1 : static GDALDataset *BuildMemDatasetWithRowGroupExtents(OGRParquetLayer *poLayer)
188 : {
189 1 : int iParquetXMin = -1;
190 1 : int iParquetYMin = -1;
191 1 : int iParquetXMax = -1;
192 1 : int iParquetYMax = -1;
193 1 : if (poLayer->GeomColsBBOXParquet(0, iParquetXMin, iParquetYMin,
194 : iParquetXMax, iParquetYMax))
195 : {
196 1 : auto poMemDrv = GetGDALDriverManager()->GetDriverByName("Memory");
197 1 : if (!poMemDrv)
198 0 : return nullptr;
199 : auto poMemDS = std::unique_ptr<GDALDataset>(
200 2 : poMemDrv->Create("", 0, 0, 0, GDT_Unknown, nullptr));
201 1 : if (!poMemDS)
202 0 : return nullptr;
203 1 : OGRSpatialReference *poTmpSRS = nullptr;
204 1 : const auto poSrcSRS = poLayer->GetSpatialRef();
205 1 : if (poSrcSRS)
206 0 : poTmpSRS = poSrcSRS->Clone();
207 : auto poMemLayer =
208 1 : poMemDS->CreateLayer("footprint", poTmpSRS, wkbPolygon, nullptr);
209 1 : if (poTmpSRS)
210 0 : poTmpSRS->Release();
211 1 : if (!poMemLayer)
212 0 : return nullptr;
213 1 : poMemLayer->CreateField(
214 1 : std::make_unique<OGRFieldDefn>("feature_count", OFTInteger64)
215 1 : .get());
216 :
217 : const auto metadata =
218 2 : poLayer->GetReader()->parquet_reader()->metadata();
219 1 : const int numRowGroups = metadata->num_row_groups();
220 15 : for (int iRowGroup = 0; iRowGroup < numRowGroups; ++iRowGroup)
221 : {
222 28 : std::string osMinTmp, osMaxTmp;
223 : OGRField unusedF;
224 : bool unusedB;
225 : OGRFieldSubType unusedSubType;
226 :
227 : OGRField sXMin;
228 14 : OGR_RawField_SetNull(&sXMin);
229 14 : bool bFoundXMin = false;
230 14 : OGRFieldType eXMinType = OFTMaxType;
231 :
232 : OGRField sYMin;
233 14 : OGR_RawField_SetNull(&sYMin);
234 14 : bool bFoundYMin = false;
235 14 : OGRFieldType eYMinType = OFTMaxType;
236 :
237 : OGRField sXMax;
238 14 : OGR_RawField_SetNull(&sXMax);
239 14 : bool bFoundXMax = false;
240 14 : OGRFieldType eXMaxType = OFTMaxType;
241 :
242 : OGRField sYMax;
243 14 : OGR_RawField_SetNull(&sYMax);
244 14 : bool bFoundYMax = false;
245 14 : OGRFieldType eYMaxType = OFTMaxType;
246 :
247 14 : if (poLayer->GetMinMaxForParquetCol(
248 : iRowGroup, iParquetXMin, nullptr,
249 : /* bComputeMin = */ true, sXMin, bFoundXMin,
250 : /* bComputeMax = */ false, unusedF, unusedB, eXMinType,
251 8 : unusedSubType, osMinTmp, osMaxTmp) &&
252 8 : bFoundXMin && eXMinType == OFTReal &&
253 22 : poLayer->GetMinMaxForParquetCol(
254 : iRowGroup, iParquetYMin, nullptr,
255 : /* bComputeMin = */ true, sYMin, bFoundYMin,
256 : /* bComputeMax = */ false, unusedF, unusedB, eYMinType,
257 8 : unusedSubType, osMinTmp, osMaxTmp) &&
258 8 : bFoundYMin && eYMinType == OFTReal &&
259 22 : poLayer->GetMinMaxForParquetCol(
260 : iRowGroup, iParquetXMax, nullptr,
261 : /* bComputeMin = */ false, unusedF, unusedB,
262 : /* bComputeMax = */ true, sXMax, bFoundXMax, eXMaxType,
263 8 : unusedSubType, osMaxTmp, osMaxTmp) &&
264 8 : bFoundXMax && eXMaxType == OFTReal &&
265 22 : poLayer->GetMinMaxForParquetCol(
266 : iRowGroup, iParquetYMax, nullptr,
267 : /* bComputeMin = */ false, unusedF, unusedB,
268 : /* bComputeMax = */ true, sYMax, bFoundYMax, eYMaxType,
269 8 : unusedSubType, osMaxTmp, osMaxTmp) &&
270 22 : bFoundYMax && eYMaxType == OFTReal)
271 : {
272 16 : OGRFeature oFeat(poMemLayer->GetLayerDefn());
273 8 : oFeat.SetField(0,
274 : static_cast<GIntBig>(
275 8 : metadata->RowGroup(iRowGroup)->num_rows()));
276 16 : auto poPoly = std::make_unique<OGRPolygon>();
277 8 : auto poLR = std::make_unique<OGRLinearRing>();
278 8 : poLR->addPoint(sXMin.Real, sYMin.Real);
279 8 : poLR->addPoint(sXMin.Real, sYMax.Real);
280 8 : poLR->addPoint(sXMax.Real, sYMax.Real);
281 8 : poLR->addPoint(sXMax.Real, sYMin.Real);
282 8 : poLR->addPoint(sXMin.Real, sYMin.Real);
283 8 : poPoly->addRingDirectly(poLR.release());
284 8 : oFeat.SetGeometryDirectly(poPoly.release());
285 8 : CPL_IGNORE_RET_VAL(poMemLayer->CreateFeature(&oFeat));
286 : }
287 : }
288 :
289 1 : return poMemDS.release();
290 : }
291 0 : return nullptr;
292 : }
293 :
294 : /************************************************************************/
295 : /* Open() */
296 : /************************************************************************/
297 :
298 1600 : static GDALDataset *OGRParquetDriverOpen(GDALOpenInfo *poOpenInfo)
299 : {
300 1600 : if (poOpenInfo->eAccess == GA_Update)
301 61 : return nullptr;
302 :
303 : #ifdef GDAL_USE_ARROWDATASET
304 3078 : std::string osBasePath(poOpenInfo->pszFilename);
305 3078 : std::string osQueryParameters;
306 : const bool bStartedWithParquetPrefix =
307 1539 : STARTS_WITH(osBasePath.c_str(), "PARQUET:");
308 :
309 1539 : if (bStartedWithParquetPrefix)
310 : {
311 260 : osBasePath = osBasePath.substr(strlen("PARQUET:"));
312 : }
313 :
314 : // Little trick to allow using syntax of
315 : // https://github.com/opengeospatial/geoparquet/discussions/101
316 : // ogrinfo
317 : // "/vsicurl/https://ai4edataeuwest.blob.core.windows.net/us-census/2020/cb_2020_us_vtd_500k.parquet?${SAS_TOKEN}"
318 1539 : if (STARTS_WITH(osBasePath.c_str(), "/vsicurl/"))
319 : {
320 1 : const auto nPos = osBasePath.find(".parquet?st=");
321 1 : if (nPos != std::string::npos)
322 : {
323 0 : osQueryParameters = osBasePath.substr(nPos + strlen(".parquet"));
324 0 : osBasePath.resize(nPos + strlen(".parquet"));
325 : }
326 : }
327 :
328 2386 : if (bStartedWithParquetPrefix || poOpenInfo->bIsDirectory ||
329 847 : !osQueryParameters.empty())
330 : {
331 : VSIStatBufL sStat;
332 692 : if (!osBasePath.empty() && osBasePath.back() == '/')
333 0 : osBasePath.pop_back();
334 : std::string osMetadataPath =
335 692 : CPLFormFilename(osBasePath.c_str(), "_metadata", nullptr);
336 692 : if (CPLTestBool(
337 2076 : CPLGetConfigOption("OGR_PARQUET_USE_METADATA_FILE", "YES")) &&
338 1384 : VSIStatL((osMetadataPath + osQueryParameters).c_str(), &sStat) == 0)
339 : {
340 : // If there's a _metadata file, then use it to avoid listing files
341 : try
342 : {
343 36 : return OpenParquetDatasetWithMetadata(
344 : osBasePath, "_metadata", osQueryParameters,
345 18 : poOpenInfo->papszOpenOptions);
346 : }
347 0 : catch (const std::exception &e)
348 : {
349 0 : CPLError(CE_Failure, CPLE_AppDefined, "Parquet exception: %s",
350 0 : e.what());
351 : }
352 0 : return nullptr;
353 : }
354 : else
355 : {
356 674 : bool bLikelyParquetDataset = false;
357 674 : if (poOpenInfo->bIsDirectory)
358 : {
359 : // Detect if the directory contains .parquet files, or
360 : // subdirectories with a name of the form "key=value", typical
361 : // of HIVE partitioning.
362 844 : const CPLStringList aosFiles(VSIReadDir(osBasePath.c_str()));
363 22244 : for (const char *pszFilename : cpl::Iterate(aosFiles))
364 : {
365 21824 : if (EQUAL(CPLGetExtension(pszFilename), "parquet"))
366 : {
367 2 : bLikelyParquetDataset = true;
368 2 : break;
369 : }
370 21822 : else if (strchr(pszFilename, '='))
371 : {
372 : // HIVE partitioning
373 0 : if (VSIStatL(CPLFormFilename(osBasePath.c_str(),
374 : pszFilename, nullptr),
375 0 : &sStat) == 0 &&
376 0 : VSI_ISDIR(sStat.st_mode))
377 : {
378 0 : bLikelyParquetDataset = true;
379 0 : break;
380 : }
381 : }
382 : }
383 : }
384 :
385 674 : if (bStartedWithParquetPrefix || bLikelyParquetDataset)
386 : {
387 : try
388 : {
389 508 : return OpenParquetDatasetWithoutMetadata(
390 : osBasePath, osQueryParameters,
391 254 : poOpenInfo->papszOpenOptions);
392 : }
393 0 : catch (const std::exception &e)
394 : {
395 : // If we aren't quite sure that the passed file name is
396 : // a directory, then silently continue
397 0 : if (poOpenInfo->bIsDirectory)
398 : {
399 0 : CPLError(CE_Failure, CPLE_AppDefined,
400 0 : "Parquet exception: %s", e.what());
401 0 : return nullptr;
402 : }
403 : }
404 : }
405 : }
406 : }
407 : #endif
408 :
409 1267 : if (!OGRParquetDriverIdentify(poOpenInfo))
410 : {
411 0 : return nullptr;
412 : }
413 :
414 1267 : if (poOpenInfo->bIsDirectory)
415 420 : return nullptr;
416 :
417 1694 : std::string osFilename(poOpenInfo->pszFilename);
418 847 : if (STARTS_WITH(poOpenInfo->pszFilename, "PARQUET:"))
419 : {
420 0 : osFilename = poOpenInfo->pszFilename + strlen("PARQUET:");
421 : }
422 :
423 : try
424 : {
425 847 : std::shared_ptr<arrow::io::RandomAccessFile> infile;
426 1310 : if (STARTS_WITH(osFilename.c_str(), "/vsi") ||
427 463 : CPLTestBool(CPLGetConfigOption("OGR_PARQUET_USE_VSI", "NO")))
428 : {
429 384 : VSIVirtualHandleUniquePtr fp(poOpenInfo->fpL);
430 384 : poOpenInfo->fpL = nullptr;
431 384 : if (fp == nullptr)
432 : {
433 0 : fp.reset(VSIFOpenL(osFilename.c_str(), "rb"));
434 0 : if (fp == nullptr)
435 0 : return nullptr;
436 : }
437 768 : infile = std::make_shared<OGRArrowRandomAccessFile>(osFilename,
438 768 : std::move(fp));
439 : }
440 : else
441 : {
442 463 : PARQUET_ASSIGN_OR_THROW(infile,
443 : arrow::io::ReadableFile::Open(osFilename));
444 : }
445 :
446 : // Open Parquet file reader
447 847 : std::unique_ptr<parquet::arrow::FileReader> arrow_reader;
448 : auto poMemoryPool = std::shared_ptr<arrow::MemoryPool>(
449 1694 : arrow::MemoryPool::CreateDefault().release());
450 847 : auto st = parquet::arrow::OpenFile(std::move(infile),
451 2541 : poMemoryPool.get(), &arrow_reader);
452 847 : if (!st.ok())
453 : {
454 1 : CPLError(CE_Failure, CPLE_AppDefined,
455 : "parquet::arrow::OpenFile() failed");
456 1 : return nullptr;
457 : }
458 :
459 1692 : auto poDS = std::make_unique<OGRParquetDataset>(poMemoryPool);
460 : auto poLayer = std::make_unique<OGRParquetLayer>(
461 846 : poDS.get(), CPLGetBasename(osFilename.c_str()),
462 2538 : std::move(arrow_reader), poOpenInfo->papszOpenOptions);
463 :
464 : // For debug purposes: return a layer with the extent of each row group
465 846 : if (CPLTestBool(
466 : CPLGetConfigOption("OGR_PARQUET_SHOW_ROW_GROUP_EXTENT", "NO")))
467 : {
468 1 : return BuildMemDatasetWithRowGroupExtents(poLayer.get());
469 : }
470 :
471 845 : poDS->SetLayer(std::move(poLayer));
472 845 : return poDS.release();
473 : }
474 0 : catch (const std::exception &e)
475 : {
476 0 : CPLError(CE_Failure, CPLE_AppDefined, "Parquet exception: %s",
477 0 : e.what());
478 0 : return nullptr;
479 : }
480 : }
481 :
482 : /************************************************************************/
483 : /* Create() */
484 : /************************************************************************/
485 :
486 264 : static GDALDataset *OGRParquetDriverCreate(const char *pszName, int nXSize,
487 : int nYSize, int nBands,
488 : GDALDataType eType,
489 : char ** /* papszOptions */)
490 : {
491 264 : if (!(nXSize == 0 && nYSize == 0 && nBands == 0 && eType == GDT_Unknown))
492 0 : return nullptr;
493 :
494 : try
495 : {
496 264 : std::shared_ptr<arrow::io::OutputStream> out_file;
497 342 : if (STARTS_WITH(pszName, "/vsi") ||
498 78 : CPLTestBool(CPLGetConfigOption("OGR_PARQUET_USE_VSI", "YES")))
499 : {
500 264 : VSILFILE *fp = VSIFOpenL(pszName, "wb");
501 264 : if (fp == nullptr)
502 : {
503 1 : CPLError(CE_Failure, CPLE_FileIO, "Cannot create %s", pszName);
504 1 : return nullptr;
505 : }
506 263 : out_file = std::make_shared<OGRArrowWritableFile>(fp);
507 : }
508 : else
509 : {
510 0 : PARQUET_ASSIGN_OR_THROW(out_file,
511 : arrow::io::FileOutputStream::Open(pszName));
512 : }
513 :
514 263 : return new OGRParquetWriterDataset(out_file);
515 : }
516 0 : catch (const std::exception &e)
517 : {
518 0 : CPLError(CE_Failure, CPLE_AppDefined, "Parquet exception: %s",
519 0 : e.what());
520 0 : return nullptr;
521 : }
522 : }
523 :
524 : /************************************************************************/
525 : /* OGRParquetDriver() */
526 : /************************************************************************/
527 :
528 : class OGRParquetDriver final : public GDALDriver
529 : {
530 : bool m_bMetadataInitialized = false;
531 : void InitMetadata();
532 :
533 : public:
534 1745 : const char *GetMetadataItem(const char *pszName,
535 : const char *pszDomain) override
536 : {
537 1745 : if (EQUAL(pszName, GDAL_DS_LAYER_CREATIONOPTIONLIST))
538 : {
539 296 : InitMetadata();
540 : }
541 1745 : return GDALDriver::GetMetadataItem(pszName, pszDomain);
542 : }
543 :
544 40 : char **GetMetadata(const char *pszDomain) override
545 : {
546 40 : InitMetadata();
547 40 : return GDALDriver::GetMetadata(pszDomain);
548 : }
549 : };
550 :
551 336 : void OGRParquetDriver::InitMetadata()
552 : {
553 336 : if (m_bMetadataInitialized)
554 321 : return;
555 15 : m_bMetadataInitialized = true;
556 :
557 : CPLXMLTreeCloser oTree(
558 30 : CPLCreateXMLNode(nullptr, CXT_Element, "LayerCreationOptionList"));
559 :
560 30 : std::vector<const char *> apszCompressionMethods;
561 15 : bool bHasSnappy = false;
562 105 : for (const char *pszMethod :
563 120 : {"SNAPPY", "GZIP", "BROTLI", "ZSTD", "LZ4_RAW", "LZO", "LZ4_HADOOP"})
564 : {
565 : auto oResult = arrow::util::Codec::GetCompressionType(
566 210 : CPLString(pszMethod).tolower());
567 105 : if (oResult.ok() && arrow::util::Codec::IsAvailable(*oResult))
568 : {
569 90 : if (EQUAL(pszMethod, "SNAPPY"))
570 15 : bHasSnappy = true;
571 90 : apszCompressionMethods.emplace_back(pszMethod);
572 : }
573 : }
574 :
575 : {
576 15 : auto psOption = CPLCreateXMLNode(oTree.get(), CXT_Element, "Option");
577 15 : CPLAddXMLAttributeAndValue(psOption, "name", "COMPRESSION");
578 15 : CPLAddXMLAttributeAndValue(psOption, "type", "string-select");
579 15 : CPLAddXMLAttributeAndValue(psOption, "description",
580 : "Compression method");
581 15 : CPLAddXMLAttributeAndValue(psOption, "default",
582 : bHasSnappy ? "SNAPPY" : "NONE");
583 : {
584 15 : auto poValueNode = CPLCreateXMLNode(psOption, CXT_Element, "Value");
585 15 : CPLAddXMLAttributeAndValue(poValueNode, "alias", "UNCOMPRESSED");
586 15 : CPLCreateXMLNode(poValueNode, CXT_Text, "NONE");
587 : }
588 105 : for (const char *pszMethod : apszCompressionMethods)
589 : {
590 90 : auto poValueNode = CPLCreateXMLNode(psOption, CXT_Element, "Value");
591 90 : CPLCreateXMLNode(poValueNode, CXT_Text, pszMethod);
592 : }
593 : }
594 :
595 : {
596 15 : auto psOption = CPLCreateXMLNode(oTree.get(), CXT_Element, "Option");
597 15 : CPLAddXMLAttributeAndValue(psOption, "name", "GEOMETRY_ENCODING");
598 15 : CPLAddXMLAttributeAndValue(psOption, "type", "string-select");
599 15 : CPLAddXMLAttributeAndValue(psOption, "description",
600 : "Encoding of geometry columns");
601 15 : CPLAddXMLAttributeAndValue(psOption, "default", "WKB");
602 60 : for (const char *pszEncoding :
603 75 : {"WKB", "WKT", "GEOARROW", "GEOARROW_INTERLEAVED"})
604 : {
605 60 : auto poValueNode = CPLCreateXMLNode(psOption, CXT_Element, "Value");
606 60 : CPLCreateXMLNode(poValueNode, CXT_Text, pszEncoding);
607 60 : if (EQUAL(pszEncoding, "GEOARROW"))
608 15 : CPLAddXMLAttributeAndValue(poValueNode, "alias",
609 : "GEOARROW_STRUCT");
610 : }
611 : }
612 :
613 : {
614 15 : auto psOption = CPLCreateXMLNode(oTree.get(), CXT_Element, "Option");
615 15 : CPLAddXMLAttributeAndValue(psOption, "name", "ROW_GROUP_SIZE");
616 15 : CPLAddXMLAttributeAndValue(psOption, "type", "integer");
617 15 : CPLAddXMLAttributeAndValue(psOption, "description",
618 : "Maximum number of rows per group");
619 15 : CPLAddXMLAttributeAndValue(psOption, "default", "65536");
620 : }
621 :
622 : {
623 15 : auto psOption = CPLCreateXMLNode(oTree.get(), CXT_Element, "Option");
624 15 : CPLAddXMLAttributeAndValue(psOption, "name", "GEOMETRY_NAME");
625 15 : CPLAddXMLAttributeAndValue(psOption, "type", "string");
626 15 : CPLAddXMLAttributeAndValue(psOption, "description",
627 : "Name of geometry column");
628 15 : CPLAddXMLAttributeAndValue(psOption, "default", "geometry");
629 : }
630 :
631 : {
632 15 : auto psOption = CPLCreateXMLNode(oTree.get(), CXT_Element, "Option");
633 15 : CPLAddXMLAttributeAndValue(psOption, "name", "COORDINATE_PRECISION");
634 15 : CPLAddXMLAttributeAndValue(psOption, "type", "float");
635 15 : CPLAddXMLAttributeAndValue(psOption, "description",
636 : "Number of decimals for coordinates (only "
637 : "for GEOMETRY_ENCODING=WKT)");
638 : }
639 :
640 : {
641 15 : auto psOption = CPLCreateXMLNode(oTree.get(), CXT_Element, "Option");
642 15 : CPLAddXMLAttributeAndValue(psOption, "name", "FID");
643 15 : CPLAddXMLAttributeAndValue(psOption, "type", "string");
644 15 : CPLAddXMLAttributeAndValue(psOption, "description",
645 : "Name of the FID column to create");
646 : }
647 :
648 : {
649 15 : auto psOption = CPLCreateXMLNode(oTree.get(), CXT_Element, "Option");
650 15 : CPLAddXMLAttributeAndValue(psOption, "name", "POLYGON_ORIENTATION");
651 15 : CPLAddXMLAttributeAndValue(psOption, "type", "string-select");
652 15 : CPLAddXMLAttributeAndValue(
653 : psOption, "description",
654 : "Which ring orientation to use for polygons");
655 15 : CPLAddXMLAttributeAndValue(psOption, "default", "COUNTERCLOCKWISE");
656 15 : CPLCreateXMLElementAndValue(psOption, "Value", "COUNTERCLOCKWISE");
657 15 : CPLCreateXMLElementAndValue(psOption, "Value", "UNMODIFIED");
658 : }
659 :
660 : {
661 15 : auto psOption = CPLCreateXMLNode(oTree.get(), CXT_Element, "Option");
662 15 : CPLAddXMLAttributeAndValue(psOption, "name", "EDGES");
663 15 : CPLAddXMLAttributeAndValue(psOption, "type", "string-select");
664 15 : CPLAddXMLAttributeAndValue(
665 : psOption, "description",
666 : "Name of the coordinate system for the edges");
667 15 : CPLAddXMLAttributeAndValue(psOption, "default", "PLANAR");
668 15 : CPLCreateXMLElementAndValue(psOption, "Value", "PLANAR");
669 15 : CPLCreateXMLElementAndValue(psOption, "Value", "SPHERICAL");
670 : }
671 :
672 : {
673 15 : auto psOption = CPLCreateXMLNode(oTree.get(), CXT_Element, "Option");
674 15 : CPLAddXMLAttributeAndValue(psOption, "name", "CREATOR");
675 15 : CPLAddXMLAttributeAndValue(psOption, "type", "string");
676 15 : CPLAddXMLAttributeAndValue(psOption, "description",
677 : "Name of creating application");
678 : }
679 :
680 : {
681 15 : auto psOption = CPLCreateXMLNode(oTree.get(), CXT_Element, "Option");
682 15 : CPLAddXMLAttributeAndValue(psOption, "name", "WRITE_COVERING_BBOX");
683 15 : CPLAddXMLAttributeAndValue(psOption, "type", "boolean");
684 15 : CPLAddXMLAttributeAndValue(psOption, "default", "YES");
685 15 : CPLAddXMLAttributeAndValue(psOption, "description",
686 : "Whether to write xmin/ymin/xmax/ymax "
687 : "columns with the bounding box of "
688 : "geometries");
689 : }
690 :
691 : {
692 15 : auto psOption = CPLCreateXMLNode(oTree.get(), CXT_Element, "Option");
693 15 : CPLAddXMLAttributeAndValue(psOption, "name", "SORT_BY_BBOX");
694 15 : CPLAddXMLAttributeAndValue(psOption, "type", "boolean");
695 15 : CPLAddXMLAttributeAndValue(psOption, "default", "NO");
696 15 : CPLAddXMLAttributeAndValue(psOption, "description",
697 : "Whether features should be sorted based on "
698 : "the bounding box of their geometries");
699 : }
700 :
701 15 : char *pszXML = CPLSerializeXMLTree(oTree.get());
702 15 : GDALDriver::SetMetadataItem(GDAL_DS_LAYER_CREATIONOPTIONLIST, pszXML);
703 15 : CPLFree(pszXML);
704 : }
705 :
706 : /************************************************************************/
707 : /* RegisterOGRParquet() */
708 : /************************************************************************/
709 :
710 30 : void RegisterOGRParquet()
711 : {
712 30 : if (GDALGetDriverByName(DRIVER_NAME) != nullptr)
713 0 : return;
714 :
715 60 : auto poDriver = std::make_unique<OGRParquetDriver>();
716 30 : OGRParquetDriverSetCommonMetadata(poDriver.get());
717 :
718 30 : poDriver->pfnOpen = OGRParquetDriverOpen;
719 30 : poDriver->pfnCreate = OGRParquetDriverCreate;
720 :
721 30 : poDriver->SetMetadataItem("ARROW_VERSION", ARROW_VERSION_STRING);
722 : #ifdef GDAL_USE_ARROWDATASET
723 30 : poDriver->SetMetadataItem("ARROW_DATASET", "YES");
724 : #endif
725 :
726 30 : GetGDALDriverManager()->RegisterDriver(poDriver.release());
727 :
728 : #if ARROW_VERSION_MAJOR >= 16
729 : // Mostly for tests
730 : const char *pszPath =
731 30 : CPLGetConfigOption("OGR_PARQUET_LOAD_FILE_SYSTEM_FACTORIES", nullptr);
732 30 : if (pszPath)
733 : {
734 0 : auto result = arrow::fs::LoadFileSystemFactories(pszPath);
735 0 : if (!result.ok())
736 : {
737 0 : CPLError(CE_Warning, CPLE_AppDefined,
738 : "arrow::fs::LoadFileSystemFactories() failed with %s",
739 0 : result.message().c_str());
740 : }
741 : }
742 : #endif
743 : }
|