Line data Source code
1 : /******************************************************************************
2 : *
3 : * Project: Parquet Translator
4 : * Purpose: Implements OGRParquetDriver.
5 : * Author: Even Rouault, <even.rouault at spatialys.com>
6 : *
7 : ******************************************************************************
8 : * Copyright (c) 2022, Planet Labs
9 : *
10 : * Permission is hereby granted, free of charge, to any person obtaining a
11 : * copy of this software and associated documentation files (the "Software"),
12 : * to deal in the Software without restriction, including without limitation
13 : * the rights to use, copy, modify, merge, publish, distribute, sublicense,
14 : * and/or sell copies of the Software, and to permit persons to whom the
15 : * Software is furnished to do so, subject to the following conditions:
16 : *
17 : * The above copyright notice and this permission notice shall be included
18 : * in all copies or substantial portions of the Software.
19 : *
20 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
21 : * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 : * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 : * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25 : * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26 : * DEALINGS IN THE SOFTWARE.
27 : ****************************************************************************/
28 :
29 : #include "ogrsf_frmts.h"
30 : #include "gdal_priv.h"
31 :
32 : #include "ogrparquetdrivercore.h"
33 :
34 : /************************************************************************/
35 : /* Identify() */
36 : /************************************************************************/
37 :
38 : template <size_t N> constexpr int constexpr_length(const char (&)[N])
39 : {
40 : return static_cast<int>(N - 1);
41 : }
42 :
43 42834 : int OGRParquetDriverIdentify(GDALOpenInfo *poOpenInfo)
44 : {
45 : #if defined(GDAL_USE_ARROWDATASET) || defined(PLUGIN_FILENAME)
46 42834 : if (poOpenInfo->bIsDirectory)
47 : {
48 : // Might be a ParquetDataset
49 852 : return -1;
50 : }
51 : #endif
52 41982 : if (STARTS_WITH(poOpenInfo->pszFilename, "PARQUET:"))
53 9 : return TRUE;
54 :
55 : // See https://github.com/apache/parquet-format#file-format
56 41973 : bool bRet = false;
57 41973 : constexpr const char SIGNATURE[] = "PAR1";
58 41973 : constexpr int SIGNATURE_SIZE = constexpr_length(SIGNATURE);
59 : static_assert(SIGNATURE_SIZE == 4, "SIGNATURE_SIZE == 4");
60 41973 : constexpr int METADATASIZE_SIZE = 4;
61 41973 : if (poOpenInfo->fpL != nullptr &&
62 2725 : poOpenInfo->nHeaderBytes >=
63 2625 : SIGNATURE_SIZE + METADATASIZE_SIZE + SIGNATURE_SIZE &&
64 2625 : memcmp(poOpenInfo->pabyHeader, SIGNATURE, SIGNATURE_SIZE) == 0)
65 : {
66 1323 : VSIFSeekL(poOpenInfo->fpL, 0, SEEK_END);
67 1323 : const auto nFileSize = VSIFTellL(poOpenInfo->fpL);
68 1323 : VSIFSeekL(poOpenInfo->fpL,
69 : nFileSize - (METADATASIZE_SIZE + SIGNATURE_SIZE), SEEK_SET);
70 1323 : uint32_t nMetadataSize = 0;
71 : static_assert(sizeof(nMetadataSize) == METADATASIZE_SIZE,
72 : "sizeof(nMetadataSize) == METADATASIZE_SIZE");
73 1323 : VSIFReadL(&nMetadataSize, 1, sizeof(nMetadataSize), poOpenInfo->fpL);
74 1323 : CPL_LSBPTR32(&nMetadataSize);
75 1323 : unsigned char abyTrailingBytes[SIGNATURE_SIZE] = {0};
76 1323 : VSIFReadL(&abyTrailingBytes[0], 1, SIGNATURE_SIZE, poOpenInfo->fpL);
77 2646 : bRet = memcmp(abyTrailingBytes, SIGNATURE, SIGNATURE_SIZE) == 0 &&
78 1323 : nMetadataSize < nFileSize;
79 1323 : VSIFSeekL(poOpenInfo->fpL, 0, SEEK_SET);
80 : }
81 41973 : return bRet;
82 : }
83 :
84 : /************************************************************************/
85 : /* OGRParquetDriverSetCommonMetadata() */
86 : /************************************************************************/
87 :
88 1244 : void OGRParquetDriverSetCommonMetadata(GDALDriver *poDriver)
89 : {
90 1244 : poDriver->SetDescription(DRIVER_NAME);
91 1244 : poDriver->SetMetadataItem(GDAL_DCAP_VECTOR, "YES");
92 1244 : poDriver->SetMetadataItem(GDAL_DCAP_CREATE_LAYER, "YES");
93 1244 : poDriver->SetMetadataItem(GDAL_DMD_LONGNAME, "(Geo)Parquet");
94 1244 : poDriver->SetMetadataItem(GDAL_DMD_EXTENSION, "parquet");
95 1244 : poDriver->SetMetadataItem(GDAL_DMD_HELPTOPIC,
96 1244 : "drivers/vector/parquet.html");
97 1244 : poDriver->SetMetadataItem(GDAL_DCAP_VIRTUALIO, "YES");
98 1244 : poDriver->SetMetadataItem(GDAL_DCAP_MEASURED_GEOMETRIES, "YES");
99 1244 : poDriver->SetMetadataItem(GDAL_DCAP_Z_GEOMETRIES, "YES");
100 :
101 1244 : poDriver->SetMetadataItem(GDAL_DCAP_CREATE_FIELD, "YES");
102 1244 : poDriver->SetMetadataItem(
103 : GDAL_DMD_CREATIONFIELDDATATYPES,
104 : "Integer Integer64 Real String Date Time DateTime "
105 1244 : "Binary IntegerList Integer64List RealList StringList");
106 1244 : poDriver->SetMetadataItem(GDAL_DMD_CREATIONFIELDDATASUBTYPES,
107 1244 : "Boolean Int16 Float32 JSON UUID");
108 1244 : poDriver->SetMetadataItem(GDAL_DMD_CREATION_FIELD_DEFN_FLAGS,
109 : "WidthPrecision Nullable Comment "
110 1244 : "AlternativeName Domain");
111 1244 : poDriver->SetMetadataItem(GDAL_DMD_SUPPORTED_SQL_DIALECTS, "OGRSQL SQLITE");
112 :
113 1244 : poDriver->SetMetadataItem(
114 : GDAL_DMD_OPENOPTIONLIST,
115 : "<OpenOptionList>"
116 : " <Option name='GEOM_POSSIBLE_NAMES' type='string' description='Comma "
117 : "separated list of possible names for geometry column(s).' "
118 : "default='geometry,wkb_geometry,wkt_geometry'/>"
119 : " <Option name='CRS' type='string' "
120 : "description='Set/override CRS, typically defined as AUTH:CODE "
121 : "(e.g EPSG:4326), of geometry column(s)'/>"
122 1244 : "</OpenOptionList>");
123 :
124 1244 : poDriver->pfnIdentify = OGRParquetDriverIdentify;
125 1244 : poDriver->SetMetadataItem(GDAL_DCAP_OPEN, "YES");
126 1244 : poDriver->SetMetadataItem(GDAL_DCAP_CREATE, "YES");
127 1244 : }
128 :
129 : /************************************************************************/
130 : /* DeclareDeferredOGRParquetPlugin() */
131 : /************************************************************************/
132 :
133 : #ifdef PLUGIN_FILENAME
134 1523 : void DeclareDeferredOGRParquetPlugin()
135 : {
136 1523 : if (GDALGetDriverByName(DRIVER_NAME) != nullptr)
137 : {
138 301 : return;
139 : }
140 1222 : auto poDriver = new GDALPluginDriverProxy(PLUGIN_FILENAME);
141 : #ifdef PLUGIN_INSTALLATION_MESSAGE
142 : poDriver->SetMetadataItem(GDAL_DMD_PLUGIN_INSTALLATION_MESSAGE,
143 : PLUGIN_INSTALLATION_MESSAGE);
144 : #endif
145 1222 : OGRParquetDriverSetCommonMetadata(poDriver);
146 1222 : GetGDALDriverManager()->DeclareDeferredPluginDriver(poDriver);
147 : }
148 : #endif
|