Line data Source code
1 : /******************************************************************************
2 : *
3 : * Project: Feather Translator
4 : * Purpose: Implements OGRFeatherDriver.
5 : * Author: Even Rouault, <even.rouault at spatialys.com>
6 : *
7 : ******************************************************************************
8 : * Copyright (c) 2022, Planet Labs
9 : *
10 : * Permission is hereby granted, free of charge, to any person obtaining a
11 : * copy of this software and associated documentation files (the "Software"),
12 : * to deal in the Software without restriction, including without limitation
13 : * the rights to use, copy, modify, merge, publish, distribute, sublicense,
14 : * and/or sell copies of the Software, and to permit persons to whom the
15 : * Software is furnished to do so, subject to the following conditions:
16 : *
17 : * The above copyright notice and this permission notice shall be included
18 : * in all copies or substantial portions of the Software.
19 : *
20 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
21 : * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 : * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 : * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25 : * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26 : * DEALINGS IN THE SOFTWARE.
27 : ****************************************************************************/
28 :
29 : #ifndef OGR_FEATHER_H
30 : #define OGR_FEATHER_H
31 :
32 : #include "ogrsf_frmts.h"
33 :
34 : #include <map>
35 :
36 : #include "../arrow_common/ogr_arrow.h"
37 :
38 : #ifdef _MSC_VER
39 : #pragma warning(push)
40 : // warning 4244: 'initializing': conversion from 'int32_t' to 'int16_t',
41 : // possible loss of data
42 : #pragma warning(disable : 4244)
43 : // warning 4458: declaration of 'type_id' hides class member
44 : #pragma warning(disable : 4458)
45 : #endif
46 :
47 : #include "arrow/ipc/writer.h"
48 :
49 : #ifdef _MSC_VER
50 : #pragma warning(pop)
51 : #endif
52 :
53 : constexpr const char *GDAL_GEO_FOOTER_KEY = "gdal:geo";
54 : constexpr const char *ARROW_DRIVER_NAME_UC = "ARROW";
55 :
56 : /************************************************************************/
57 : /* OGRFeatherLayer */
58 : /************************************************************************/
59 :
60 : class OGRFeatherDataset;
61 :
62 : class OGRFeatherLayer final : public OGRArrowLayer
63 :
64 : {
65 : OGRFeatherLayer(const OGRFeatherLayer &) = delete;
66 : OGRFeatherLayer &operator=(const OGRFeatherLayer &) = delete;
67 :
68 : OGRFeatherDataset *m_poDS = nullptr;
69 :
70 : // Variable only for seekable file format
71 : std::shared_ptr<arrow::ipc::RecordBatchFileReader>
72 : m_poRecordBatchFileReader{};
73 :
74 : // Variables only for streamable IPC format
75 : std::shared_ptr<arrow::io::RandomAccessFile> m_poFile{};
76 : bool m_bSeekable = true;
77 : arrow::ipc::IpcReadOptions m_oOptions{};
78 : std::shared_ptr<arrow::ipc::RecordBatchStreamReader>
79 : m_poRecordBatchReader{};
80 : bool m_bResetRecordBatchReaderAsked = false;
81 : bool m_bSingleBatch = false;
82 : std::shared_ptr<arrow::RecordBatch> m_poBatchIdx0{};
83 : std::shared_ptr<arrow::RecordBatch> m_poBatchIdx1{};
84 :
85 : CPLStringList m_aosFeatherMetadata{};
86 :
87 194 : virtual std::string GetDriverUCName() const override
88 : {
89 194 : return ARROW_DRIVER_NAME_UC;
90 : }
91 :
92 : bool ResetRecordBatchReader();
93 :
94 : void EstablishFeatureDefn();
95 : void LoadGeoMetadata(const arrow::KeyValueMetadata *kv_metadata,
96 : const std::string &key);
97 : OGRwkbGeometryType ComputeGeometryColumnType(int iGeomCol, int iCol) const;
98 : bool ReadNextBatch() override;
99 :
100 : void InvalidateCachedBatches() override;
101 :
102 : OGRFeature *GetNextRawFeature();
103 :
104 : virtual bool CanRunNonForcedGetExtent() override;
105 :
106 : bool
107 : CanPostFilterArrowArray(const struct ArrowSchema *schema) const override;
108 :
109 : bool ReadNextBatchFile();
110 : bool ReadNextBatchStream();
111 : void TryToCacheFirstTwoBatches();
112 :
113 : public:
114 : OGRFeatherLayer(OGRFeatherDataset *poDS, const char *pszLayerName,
115 : std::shared_ptr<arrow::ipc::RecordBatchFileReader>
116 : &poRecordBatchFileReader);
117 : OGRFeatherLayer(OGRFeatherDataset *poDS, const char *pszLayerName,
118 : std::shared_ptr<arrow::io::RandomAccessFile> poFile,
119 : bool bSeekable, const arrow::ipc::IpcReadOptions &oOptions,
120 : std::shared_ptr<arrow::ipc::RecordBatchStreamReader>
121 : &poRecordBatchStreamReader);
122 :
123 : void ResetReading() override;
124 : int TestCapability(const char *pszCap) override;
125 : GIntBig GetFeatureCount(int bForce) override;
126 : const char *GetMetadataItem(const char *pszName,
127 : const char *pszDomain = "") override;
128 : char **GetMetadata(const char *pszDomain = "") override;
129 :
130 : GDALDataset *GetDataset() override;
131 :
132 : std::unique_ptr<OGRFieldDomain> BuildDomain(const std::string &osDomainName,
133 : int iFieldIndex) const override;
134 : };
135 :
136 : /************************************************************************/
137 : /* OGRFeatherDataset */
138 : /************************************************************************/
139 :
140 : class OGRFeatherDataset final : public OGRArrowDataset
141 : {
142 : public:
143 : explicit OGRFeatherDataset(
144 : const std::shared_ptr<arrow::MemoryPool> &poMemoryPool);
145 :
146 : int TestCapability(const char *) override;
147 : };
148 :
149 : /************************************************************************/
150 : /* OGRFeatherWriterLayer */
151 : /************************************************************************/
152 :
153 : class OGRFeatherWriterLayer final : public OGRArrowWriterLayer
154 :
155 : {
156 : OGRFeatherWriterLayer(const OGRFeatherWriterLayer &) = delete;
157 : OGRFeatherWriterLayer &operator=(const OGRFeatherWriterLayer &) = delete;
158 :
159 : GDALDataset *m_poDS = nullptr;
160 : bool m_bStreamFormat = false;
161 : std::shared_ptr<arrow::ipc::RecordBatchWriter> m_poFileWriter{};
162 : std::shared_ptr<arrow::KeyValueMetadata> m_poFooterKeyValueMetadata{};
163 :
164 604 : virtual bool IsFileWriterCreated() const override
165 : {
166 604 : return m_poFileWriter != nullptr;
167 : }
168 :
169 : virtual void CreateWriter() override;
170 : virtual bool CloseFileWriter() override;
171 :
172 : virtual void CreateSchema() override;
173 : virtual void PerformStepsBeforeFinalFlushGroup() override;
174 :
175 : virtual bool FlushGroup() override;
176 :
177 222 : virtual std::string GetDriverUCName() const override
178 : {
179 222 : return ARROW_DRIVER_NAME_UC;
180 : }
181 :
182 : virtual bool
183 : IsSupportedGeometryType(OGRwkbGeometryType eGType) const override;
184 :
185 0 : virtual bool IsSRSRequired() const override
186 : {
187 0 : return true;
188 : }
189 :
190 : public:
191 : OGRFeatherWriterLayer(
192 : GDALDataset *poDS, arrow::MemoryPool *poMemoryPool,
193 : const std::shared_ptr<arrow::io::OutputStream> &poOutputStream,
194 : const char *pszLayerName);
195 :
196 : ~OGRFeatherWriterLayer() override;
197 :
198 : bool SetOptions(const std::string &osFilename, CSLConstList papszOptions,
199 : const OGRSpatialReference *poSpatialRef,
200 : OGRwkbGeometryType eGType);
201 :
202 : bool WriteArrowBatch(const struct ArrowSchema *schema,
203 : struct ArrowArray *array,
204 : CSLConstList papszOptions = nullptr) override;
205 :
206 10 : GDALDataset *GetDataset() override
207 : {
208 10 : return m_poDS;
209 : }
210 : };
211 :
212 : /************************************************************************/
213 : /* OGRFeatherWriterDataset */
214 : /************************************************************************/
215 :
216 : class OGRFeatherWriterDataset final : public GDALPamDataset
217 : {
218 : const std::string m_osFilename{};
219 : std::unique_ptr<arrow::MemoryPool> m_poMemoryPool{};
220 : std::unique_ptr<OGRFeatherWriterLayer> m_poLayer{};
221 : std::shared_ptr<arrow::io::OutputStream> m_poOutputStream{};
222 :
223 : public:
224 : explicit OGRFeatherWriterDataset(
225 : const char *pszFilename,
226 : const std::shared_ptr<arrow::io::OutputStream> &poOutputStream);
227 :
228 : arrow::MemoryPool *GetMemoryPool() const
229 : {
230 : return m_poMemoryPool.get();
231 : }
232 :
233 : int GetLayerCount() override;
234 : OGRLayer *GetLayer(int idx) override;
235 : int TestCapability(const char *pszCap) override;
236 : std::vector<std::string> GetFieldDomainNames(
237 : CSLConstList /*papszOptions*/ = nullptr) const override;
238 : const OGRFieldDomain *
239 : GetFieldDomain(const std::string &name) const override;
240 : bool AddFieldDomain(std::unique_ptr<OGRFieldDomain> &&domain,
241 : std::string &failureReason) override;
242 :
243 : protected:
244 : OGRLayer *ICreateLayer(const char *pszName,
245 : const OGRGeomFieldDefn *poGeomFieldDefn,
246 : CSLConstList papszOptions) override;
247 : };
248 :
249 : #endif // OGR_FEATHER_H
|