Line data Source code
1 : /******************************************************************************
2 : *
3 : * Project: Arrow generic code
4 : * Purpose: Arrow generic code
5 : * Author: Even Rouault, <even.rouault at spatialys.com>
6 : *
7 : ******************************************************************************
8 : * Copyright (c) 2022, Planet Labs
9 : *
10 : * Permission is hereby granted, free of charge, to any person obtaining a
11 : * copy of this software and associated documentation files (the "Software"),
12 : * to deal in the Software without restriction, including without limitation
13 : * the rights to use, copy, modify, merge, publish, distribute, sublicense,
14 : * and/or sell copies of the Software, and to permit persons to whom the
15 : * Software is furnished to do so, subject to the following conditions:
16 : *
17 : * The above copyright notice and this permission notice shall be included
18 : * in all copies or substantial portions of the Software.
19 : *
20 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
21 : * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 : * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 : * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25 : * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26 : * DEALINGS IN THE SOFTWARE.
27 : ****************************************************************************/
28 :
29 : #ifndef OGARROWWRITERLAYER_HPP_INCLUDED
30 : #define OGARROWWRITERLAYER_HPP_INCLUDED
31 :
32 : #include "ogr_arrow.h"
33 :
34 : #include "cpl_json.h"
35 : #include "cpl_time.h"
36 :
37 : #include "ogrlayerarrow.h"
38 : #include "ogr_wkb.h"
39 :
40 : #include <array>
41 : #include <cinttypes>
42 : #include <limits>
43 :
44 : static constexpr int TZFLAG_UNINITIALIZED = -1;
45 :
46 : #define OGR_ARROW_RETURN_NOT_OK(status, ret_value) \
47 : do \
48 : { \
49 : if (!(status).ok()) \
50 : { \
51 : CPLError(CE_Failure, CPLE_AppDefined, "%s failed", \
52 : ARROW_STRINGIFY(status)); \
53 : return (ret_value); \
54 : } \
55 : } while (false)
56 :
57 : #define OGR_ARROW_RETURN_FALSE_NOT_OK(status) \
58 : OGR_ARROW_RETURN_NOT_OK(status, false)
59 :
60 : #define OGR_ARROW_RETURN_OGRERR_NOT_OK(status) \
61 : OGR_ARROW_RETURN_NOT_OK(status, OGRERR_FAILURE)
62 :
63 : #define OGR_ARROW_PROPAGATE_OGRERR(ret_value) \
64 : do \
65 : { \
66 : if ((ret_value) != OGRERR_NONE) \
67 : return OGRERR_FAILURE; \
68 : } while (0)
69 :
70 : /************************************************************************/
71 : /* OGRArrowWriterLayer() */
72 : /************************************************************************/
73 :
74 338 : inline OGRArrowWriterLayer::OGRArrowWriterLayer(
75 : arrow::MemoryPool *poMemoryPool,
76 : const std::shared_ptr<arrow::io::OutputStream> &poOutputStream,
77 338 : const char *pszLayerName)
78 338 : : m_poMemoryPool(poMemoryPool), m_poOutputStream(poOutputStream)
79 : {
80 338 : m_poFeatureDefn = new OGRFeatureDefn(pszLayerName);
81 338 : m_poFeatureDefn->SetGeomType(wkbNone);
82 338 : m_poFeatureDefn->Reference();
83 338 : SetDescription(pszLayerName);
84 338 : }
85 :
86 : /************************************************************************/
87 : /* ~OGRArrowWriterLayer() */
88 : /************************************************************************/
89 :
90 338 : inline OGRArrowWriterLayer::~OGRArrowWriterLayer()
91 : {
92 338 : CPLDebug("ARROW", "Memory pool (writer layer): bytes_allocated = %" PRId64,
93 338 : m_poMemoryPool->bytes_allocated());
94 338 : CPLDebug("ARROW", "Memory pool (writer layer): max_memory = %" PRId64,
95 338 : m_poMemoryPool->max_memory());
96 :
97 338 : m_poFeatureDefn->Release();
98 338 : }
99 :
100 : /************************************************************************/
101 : /* FinalizeWriting() */
102 : /************************************************************************/
103 :
104 326 : inline bool OGRArrowWriterLayer::FinalizeWriting()
105 : {
106 326 : bool ret = true;
107 :
108 326 : if (!IsFileWriterCreated())
109 : {
110 199 : CreateWriter();
111 : }
112 326 : if (IsFileWriterCreated())
113 : {
114 326 : PerformStepsBeforeFinalFlushGroup();
115 :
116 326 : if (!m_apoBuilders.empty() && m_apoFieldsFromArrowSchema.empty())
117 167 : ret = FlushGroup();
118 :
119 326 : if (!CloseFileWriter())
120 0 : ret = false;
121 : }
122 :
123 326 : return ret;
124 : }
125 :
126 : /************************************************************************/
127 : /* CreateSchemaCommon() */
128 : /************************************************************************/
129 :
130 326 : inline void OGRArrowWriterLayer::CreateSchemaCommon()
131 : {
132 326 : CPLAssert(static_cast<int>(m_aeGeomEncoding.size()) ==
133 : m_poFeatureDefn->GetGeomFieldCount());
134 :
135 652 : std::vector<std::shared_ptr<arrow::Field>> fields;
136 326 : bool bNeedGDALSchema = false;
137 :
138 326 : m_anTZFlag.resize(m_poFeatureDefn->GetFieldCount(), TZFLAG_UNINITIALIZED);
139 :
140 326 : if (!m_osFIDColumn.empty())
141 : {
142 18 : bNeedGDALSchema = true;
143 18 : fields.emplace_back(arrow::field(m_osFIDColumn, arrow::int64(), false));
144 : }
145 :
146 326 : if (!m_apoFieldsFromArrowSchema.empty())
147 : {
148 117 : fields.insert(fields.end(), m_apoFieldsFromArrowSchema.begin(),
149 234 : m_apoFieldsFromArrowSchema.end());
150 : }
151 :
152 896 : for (int i = 0; i < m_poFeatureDefn->GetFieldCount(); ++i)
153 : {
154 570 : const auto poFieldDefn = m_poFeatureDefn->GetFieldDefn(i);
155 570 : std::shared_ptr<arrow::DataType> dt;
156 570 : const auto eSubDT = poFieldDefn->GetSubType();
157 570 : const auto &osDomainName = poFieldDefn->GetDomainName();
158 570 : const OGRFieldDomain *poFieldDomain = nullptr;
159 570 : const int nWidth = poFieldDefn->GetWidth();
160 570 : if (!osDomainName.empty())
161 : {
162 4 : const auto oIter = m_oMapFieldDomains.find(osDomainName);
163 4 : if (oIter == m_oMapFieldDomains.end())
164 : {
165 0 : CPLError(CE_Warning, CPLE_AppDefined,
166 : "Field %s references domain %s, but the later one "
167 : "has not been created",
168 : poFieldDefn->GetNameRef(), osDomainName.c_str());
169 : }
170 : else
171 : {
172 4 : poFieldDomain = oIter->second.get();
173 : }
174 : }
175 570 : switch (poFieldDefn->GetType())
176 : {
177 65 : case OFTInteger:
178 65 : if (eSubDT == OFSTBoolean)
179 4 : dt = arrow::boolean();
180 61 : else if (eSubDT == OFSTInt16)
181 4 : dt = arrow::int16();
182 : else
183 57 : dt = arrow::int32();
184 65 : if (poFieldDomain != nullptr)
185 : {
186 4 : dt = arrow::dictionary(dt, arrow::utf8());
187 : }
188 65 : break;
189 :
190 26 : case OFTInteger64:
191 26 : dt = arrow::int64();
192 26 : if (poFieldDomain != nullptr)
193 : {
194 0 : dt = arrow::dictionary(dt, arrow::utf8());
195 : }
196 26 : break;
197 :
198 52 : case OFTReal:
199 : {
200 52 : const int nPrecision = poFieldDefn->GetPrecision();
201 52 : if (nWidth != 0 && nPrecision != 0)
202 8 : dt = arrow::decimal(nWidth, nPrecision);
203 44 : else if (eSubDT == OFSTFloat32)
204 7 : dt = arrow::float32();
205 : else
206 37 : dt = arrow::float64();
207 52 : break;
208 : }
209 :
210 212 : case OFTString:
211 : case OFTWideString:
212 212 : if (eSubDT != OFSTNone || nWidth > 0)
213 82 : bNeedGDALSchema = true;
214 212 : dt = arrow::utf8();
215 212 : break;
216 :
217 14 : case OFTBinary:
218 14 : if (nWidth != 0)
219 4 : dt = arrow::fixed_size_binary(nWidth);
220 : else
221 10 : dt = arrow::binary();
222 14 : break;
223 :
224 48 : case OFTIntegerList:
225 48 : if (eSubDT == OFSTBoolean)
226 8 : dt = arrow::list(arrow::boolean());
227 40 : else if (eSubDT == OFSTInt16)
228 0 : dt = arrow::list(arrow::int16());
229 : else
230 40 : dt = arrow::list(arrow::int32());
231 48 : break;
232 :
233 20 : case OFTInteger64List:
234 20 : dt = arrow::list(arrow::int64());
235 20 : break;
236 :
237 35 : case OFTRealList:
238 35 : if (eSubDT == OFSTFloat32)
239 11 : dt = arrow::list(arrow::float32());
240 : else
241 24 : dt = arrow::list(arrow::float64());
242 35 : break;
243 :
244 12 : case OFTStringList:
245 : case OFTWideStringList:
246 12 : dt = arrow::list(arrow::utf8());
247 12 : break;
248 :
249 31 : case OFTDate:
250 31 : dt = arrow::date32();
251 31 : break;
252 :
253 8 : case OFTTime:
254 8 : dt = arrow::time32(arrow::TimeUnit::MILLI);
255 8 : break;
256 :
257 47 : case OFTDateTime:
258 : {
259 47 : const int nTZFlag = poFieldDefn->GetTZFlag();
260 47 : if (nTZFlag >= OGR_TZFLAG_MIXED_TZ)
261 : {
262 12 : m_anTZFlag[i] = nTZFlag;
263 : }
264 47 : dt = arrow::timestamp(arrow::TimeUnit::MILLI);
265 47 : break;
266 : }
267 : }
268 1710 : fields.emplace_back(arrow::field(poFieldDefn->GetNameRef(),
269 570 : std::move(dt),
270 1140 : poFieldDefn->IsNullable()));
271 570 : if (poFieldDefn->GetAlternativeNameRef()[0])
272 2 : bNeedGDALSchema = true;
273 570 : if (!poFieldDefn->GetComment().empty())
274 2 : bNeedGDALSchema = true;
275 : }
276 :
277 652 : for (int i = 0; i < m_poFeatureDefn->GetGeomFieldCount(); ++i)
278 : {
279 326 : const auto poGeomFieldDefn = m_poFeatureDefn->GetGeomFieldDefn(i);
280 326 : const auto eGType = poGeomFieldDefn->GetType();
281 : const int nDim =
282 326 : 2 + (OGR_GT_HasZ(eGType) ? 1 : 0) + (OGR_GT_HasM(eGType) ? 1 : 0);
283 :
284 326 : const bool pointFieldNullable = GetDriverUCName() == "PARQUET";
285 :
286 : // Fixed Size List GeoArrow encoding
287 326 : std::shared_ptr<arrow::Field> pointField;
288 326 : if (nDim == 2)
289 : pointField =
290 203 : arrow::field("xy", arrow::float64(), pointFieldNullable);
291 123 : else if (nDim == 3 && OGR_GT_HasZ(eGType))
292 : pointField =
293 71 : arrow::field("xyz", arrow::float64(), pointFieldNullable);
294 52 : else if (nDim == 3 && OGR_GT_HasM(eGType))
295 : pointField =
296 26 : arrow::field("xym", arrow::float64(), pointFieldNullable);
297 : else
298 : pointField =
299 26 : arrow::field("xyzm", arrow::float64(), pointFieldNullable);
300 :
301 : // Struct GeoArrow encoding
302 978 : auto xField(arrow::field("x", arrow::float64(), false));
303 978 : auto yField(arrow::field("y", arrow::float64(), false));
304 : std::vector<std::shared_ptr<arrow::Field>> pointFields{
305 : arrow::field("x", arrow::float64(), false),
306 1956 : arrow::field("y", arrow::float64(), false)};
307 326 : if (OGR_GT_HasZ(eGType))
308 : pointFields.emplace_back(
309 97 : arrow::field("z", arrow::float64(), false));
310 326 : if (OGR_GT_HasM(eGType))
311 : pointFields.emplace_back(
312 52 : arrow::field("m", arrow::float64(), false));
313 652 : auto pointStructType(arrow::struct_(std::move(pointFields)));
314 :
315 326 : std::shared_ptr<arrow::DataType> dt;
316 326 : switch (m_aeGeomEncoding[i])
317 : {
318 126 : case OGRArrowGeomEncoding::WKB:
319 126 : dt = arrow::binary();
320 126 : break;
321 :
322 53 : case OGRArrowGeomEncoding::WKT:
323 53 : dt = arrow::utf8();
324 53 : break;
325 :
326 0 : case OGRArrowGeomEncoding::GEOARROW_FSL_GENERIC:
327 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_GENERIC:
328 0 : CPLAssert(false);
329 : break;
330 :
331 8 : case OGRArrowGeomEncoding::GEOARROW_FSL_POINT:
332 8 : dt = arrow::fixed_size_list(pointField, nDim);
333 8 : break;
334 :
335 8 : case OGRArrowGeomEncoding::GEOARROW_FSL_LINESTRING:
336 8 : dt = arrow::list(arrow::fixed_size_list(pointField, nDim));
337 8 : break;
338 :
339 10 : case OGRArrowGeomEncoding::GEOARROW_FSL_POLYGON:
340 20 : dt = arrow::list(
341 30 : arrow::list(arrow::fixed_size_list(pointField, nDim)));
342 10 : break;
343 :
344 8 : case OGRArrowGeomEncoding::GEOARROW_FSL_MULTIPOINT:
345 8 : dt = arrow::list(arrow::fixed_size_list(pointField, nDim));
346 8 : break;
347 :
348 8 : case OGRArrowGeomEncoding::GEOARROW_FSL_MULTILINESTRING:
349 16 : dt = arrow::list(
350 24 : arrow::list(arrow::fixed_size_list(pointField, nDim)));
351 8 : break;
352 :
353 10 : case OGRArrowGeomEncoding::GEOARROW_FSL_MULTIPOLYGON:
354 20 : dt = arrow::list(arrow::list(
355 30 : arrow::list(arrow::fixed_size_list(pointField, nDim))));
356 10 : break;
357 :
358 22 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_POINT:
359 22 : dt = pointStructType;
360 22 : break;
361 :
362 13 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_LINESTRING:
363 13 : dt = arrow::list(pointStructType);
364 13 : break;
365 :
366 17 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_POLYGON:
367 17 : dt = arrow::list(arrow::list(pointStructType));
368 17 : break;
369 :
370 13 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_MULTIPOINT:
371 13 : dt = arrow::list(pointStructType);
372 13 : break;
373 :
374 13 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_MULTILINESTRING:
375 13 : dt = arrow::list(arrow::list(pointStructType));
376 13 : break;
377 :
378 17 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_MULTIPOLYGON:
379 17 : dt = arrow::list(arrow::list(arrow::list(pointStructType)));
380 17 : break;
381 : }
382 :
383 : std::shared_ptr<arrow::Field> field(
384 326 : arrow::field(poGeomFieldDefn->GetNameRef(), std::move(dt),
385 978 : poGeomFieldDefn->IsNullable()));
386 326 : if (m_bWriteFieldArrowExtensionName)
387 : {
388 125 : auto kvMetadata = field->metadata()
389 125 : ? field->metadata()->Copy()
390 125 : : std::make_shared<arrow::KeyValueMetadata>();
391 250 : kvMetadata->Append(
392 : "ARROW:extension:name",
393 125 : GetGeomEncodingAsString(m_aeGeomEncoding[i], false));
394 125 : field = field->WithMetadata(kvMetadata);
395 : }
396 :
397 326 : m_apoBaseStructGeomType.emplace_back(std::move(pointStructType));
398 :
399 326 : fields.emplace_back(std::move(field));
400 : }
401 :
402 326 : if (m_bWriteBBoxStruct)
403 : {
404 315 : for (int i = 0; i < m_poFeatureDefn->GetGeomFieldCount(); ++i)
405 : {
406 160 : const auto poGeomFieldDefn = m_poFeatureDefn->GetGeomFieldDefn(i);
407 480 : auto bbox_field_xmin(arrow::field("xmin", arrow::float32(), false));
408 480 : auto bbox_field_ymin(arrow::field("ymin", arrow::float32(), false));
409 480 : auto bbox_field_xmax(arrow::field("xmax", arrow::float32(), false));
410 480 : auto bbox_field_ymax(arrow::field("ymax", arrow::float32(), false));
411 : auto bbox_field(arrow::field(
412 : CPLGetConfigOption("OGR_PARQUET_COVERING_BBOX_NAME",
413 320 : std::string(poGeomFieldDefn->GetNameRef())
414 160 : .append("_bbox")
415 : .c_str()),
416 960 : arrow::struct_(
417 160 : {std::move(bbox_field_xmin), std::move(bbox_field_ymin),
418 960 : std::move(bbox_field_xmax), std::move(bbox_field_ymax)}),
419 960 : poGeomFieldDefn->IsNullable()));
420 160 : fields.emplace_back(bbox_field);
421 160 : m_apoFieldsBBOX.emplace_back(bbox_field);
422 : }
423 : }
424 :
425 326 : m_aoEnvelopes.resize(m_poFeatureDefn->GetGeomFieldCount());
426 326 : m_oSetWrittenGeometryTypes.resize(m_poFeatureDefn->GetGeomFieldCount());
427 :
428 326 : m_poSchema = arrow::schema(std::move(fields));
429 326 : CPLAssert(m_poSchema);
430 348 : if (bNeedGDALSchema &&
431 22 : CPLTestBool(CPLGetConfigOption(
432 348 : ("OGR_" + GetDriverUCName() + "_WRITE_GDAL_SCHEMA").c_str(),
433 : "YES")))
434 : {
435 44 : CPLJSONObject oRoot;
436 44 : CPLJSONObject oColumns;
437 :
438 22 : if (!m_osFIDColumn.empty())
439 18 : oRoot.Add("fid", m_osFIDColumn);
440 :
441 22 : oRoot.Add("columns", oColumns);
442 376 : for (int i = 0; i < m_poFeatureDefn->GetFieldCount(); ++i)
443 : {
444 354 : const auto poFieldDefn = m_poFeatureDefn->GetFieldDefn(i);
445 708 : CPLJSONObject oColumn;
446 354 : oColumns.Add(poFieldDefn->GetNameRef(), oColumn);
447 354 : oColumn.Add("type", OGR_GetFieldTypeName(poFieldDefn->GetType()));
448 354 : const auto eSubDT = poFieldDefn->GetSubType();
449 354 : if (eSubDT != OFSTNone)
450 116 : oColumn.Add("subtype", OGR_GetFieldSubTypeName(eSubDT));
451 354 : const int nWidth = poFieldDefn->GetWidth();
452 354 : if (nWidth > 0)
453 12 : oColumn.Add("width", nWidth);
454 354 : const int nPrecision = poFieldDefn->GetPrecision();
455 354 : if (nPrecision > 0)
456 8 : oColumn.Add("precision", nPrecision);
457 354 : if (poFieldDefn->GetAlternativeNameRef()[0])
458 2 : oColumn.Add("alternative_name",
459 : poFieldDefn->GetAlternativeNameRef());
460 354 : if (!poFieldDefn->GetComment().empty())
461 2 : oColumn.Add("comment", poFieldDefn->GetComment());
462 : }
463 :
464 22 : auto kvMetadata = m_poSchema->metadata()
465 0 : ? m_poSchema->metadata()->Copy()
466 44 : : std::make_shared<arrow::KeyValueMetadata>();
467 44 : kvMetadata->Append("gdal:schema",
468 44 : oRoot.Format(CPLJSONObject::PrettyFormat::Plain));
469 22 : m_poSchema = m_poSchema->WithMetadata(kvMetadata);
470 22 : CPLAssert(m_poSchema);
471 : }
472 326 : }
473 :
474 : /************************************************************************/
475 : /* FinalizeSchema() */
476 : /************************************************************************/
477 :
478 285 : inline void OGRArrowWriterLayer::FinalizeSchema()
479 : {
480 : // Final tuning of schema taking into actual timezone values
481 : // from features
482 285 : int nArrowIdxFirstField = !m_osFIDColumn.empty() ? 1 : 0;
483 853 : for (int i = 0; i < m_poFeatureDefn->GetFieldCount(); ++i)
484 : {
485 568 : if (m_anTZFlag[i] >= OGR_TZFLAG_MIXED_TZ)
486 : {
487 12 : const int nOffset = m_anTZFlag[i] == OGR_TZFLAG_UTC
488 12 : ? 0
489 8 : : (m_anTZFlag[i] - OGR_TZFLAG_UTC) * 15;
490 12 : int nHours = static_cast<int>(nOffset / 60); // Round towards zero.
491 12 : const int nMinutes = std::abs(nOffset - nHours * 60);
492 :
493 : const std::string osTZ =
494 : CPLSPrintf("%c%02d:%02d", nOffset >= 0 ? '+' : '-',
495 24 : std::abs(nHours), nMinutes);
496 24 : auto dt = arrow::timestamp(arrow::TimeUnit::MILLI, osTZ);
497 12 : const auto poFieldDefn = m_poFeatureDefn->GetFieldDefn(i);
498 12 : auto field = arrow::field(poFieldDefn->GetNameRef(), std::move(dt),
499 36 : poFieldDefn->IsNullable());
500 24 : auto result = m_poSchema->SetField(nArrowIdxFirstField + i, field);
501 12 : if (!result.ok())
502 : {
503 0 : CPLError(CE_Warning, CPLE_AppDefined,
504 : "Schema::SetField() failed with %s",
505 0 : result.status().message().c_str());
506 : }
507 : else
508 : {
509 12 : m_poSchema = *result;
510 : }
511 : }
512 : }
513 285 : }
514 :
515 : /************************************************************************/
516 : /* AddFieldDomain() */
517 : /************************************************************************/
518 :
519 : inline bool
520 9 : OGRArrowWriterLayer::AddFieldDomain(std::unique_ptr<OGRFieldDomain> &&domain,
521 : std::string &failureReason)
522 : {
523 9 : if (domain->GetDomainType() != OFDT_CODED)
524 : {
525 0 : failureReason = "Only coded field domains are supported by Arrow";
526 0 : return false;
527 : }
528 :
529 : const OGRCodedFieldDomain *poDomain =
530 9 : static_cast<const OGRCodedFieldDomain *>(domain.get());
531 9 : const OGRCodedValue *psIter = poDomain->GetEnumeration();
532 :
533 : auto poStringBuilder =
534 18 : std::make_shared<arrow::StringBuilder>(m_poMemoryPool);
535 :
536 9 : int nLastCode = -1;
537 36 : for (; psIter->pszCode; ++psIter)
538 : {
539 27 : if (CPLGetValueType(psIter->pszCode) != CPL_VALUE_INTEGER)
540 : {
541 0 : failureReason = "Non integer code in domain ";
542 0 : failureReason += domain->GetName();
543 0 : return false;
544 : }
545 27 : int nCode = atoi(psIter->pszCode);
546 27 : if (nCode <= nLastCode || nCode - nLastCode > 100)
547 : {
548 0 : failureReason = "Too sparse codes in domain ";
549 0 : failureReason += domain->GetName();
550 0 : return false;
551 : }
552 27 : for (int i = nLastCode + 1; i < nCode; ++i)
553 : {
554 0 : OGR_ARROW_RETURN_FALSE_NOT_OK(poStringBuilder->AppendNull());
555 : }
556 27 : if (psIter->pszValue)
557 27 : OGR_ARROW_RETURN_FALSE_NOT_OK(
558 : poStringBuilder->Append(psIter->pszValue));
559 : else
560 0 : OGR_ARROW_RETURN_FALSE_NOT_OK(poStringBuilder->AppendNull());
561 27 : nLastCode = nCode;
562 : }
563 :
564 9 : std::shared_ptr<arrow::Array> stringArray;
565 18 : auto status = poStringBuilder->Finish(&stringArray);
566 9 : if (!status.ok())
567 : {
568 0 : CPLError(CE_Failure, CPLE_AppDefined,
569 : "StringArray::Finish() failed with %s",
570 0 : status.message().c_str());
571 0 : return false;
572 : }
573 :
574 9 : m_oMapFieldDomainToStringArray[domain->GetName()] = std::move(stringArray);
575 9 : m_oMapFieldDomains[domain->GetName()] = std::move(domain);
576 9 : return true;
577 : }
578 :
579 : /************************************************************************/
580 : /* GetFieldDomainNames() */
581 : /************************************************************************/
582 :
583 0 : inline std::vector<std::string> OGRArrowWriterLayer::GetFieldDomainNames() const
584 : {
585 0 : std::vector<std::string> names;
586 0 : names.reserve(m_oMapFieldDomains.size());
587 0 : for (const auto &it : m_oMapFieldDomains)
588 : {
589 0 : names.emplace_back(it.first);
590 : }
591 0 : return names;
592 : }
593 :
594 : /************************************************************************/
595 : /* GetFieldDomain() */
596 : /************************************************************************/
597 :
598 : inline const OGRFieldDomain *
599 13 : OGRArrowWriterLayer::GetFieldDomain(const std::string &name) const
600 : {
601 13 : const auto iter = m_oMapFieldDomains.find(name);
602 13 : if (iter == m_oMapFieldDomains.end())
603 9 : return nullptr;
604 4 : return iter->second.get();
605 : }
606 :
607 : /************************************************************************/
608 : /* CreateField() */
609 : /************************************************************************/
610 :
611 571 : inline OGRErr OGRArrowWriterLayer::CreateField(const OGRFieldDefn *poField,
612 : int /* bApproxOK */)
613 : {
614 571 : if (m_poSchema)
615 : {
616 1 : CPLError(CE_Failure, CPLE_NotSupported,
617 : "Cannot add field after a first feature has been written");
618 1 : return OGRERR_FAILURE;
619 : }
620 570 : if (!m_apoFieldsFromArrowSchema.empty())
621 : {
622 0 : CPLError(CE_Failure, CPLE_NotSupported,
623 : "Cannot mix calls to CreateField() and "
624 : "CreateFieldFromArrowSchema()");
625 0 : return OGRERR_FAILURE;
626 : }
627 570 : m_poFeatureDefn->AddFieldDefn(poField);
628 570 : return OGRERR_NONE;
629 : }
630 :
631 : /************************************************************************/
632 : /* OGRLayer::CreateFieldFromArrowSchema() */
633 : /************************************************************************/
634 :
635 842 : inline bool OGRArrowWriterLayer::CreateFieldFromArrowSchema(
636 : const struct ArrowSchema *schema, CSLConstList /*papszOptions*/)
637 : {
638 842 : if (m_poSchema)
639 : {
640 0 : CPLError(CE_Failure, CPLE_NotSupported,
641 : "Cannot add field after a first feature has been written");
642 0 : return false;
643 : }
644 :
645 842 : if (m_poFeatureDefn->GetFieldCount())
646 : {
647 0 : CPLError(CE_Failure, CPLE_NotSupported,
648 : "Cannot mix calls to CreateField() and "
649 : "CreateFieldFromArrowSchema()");
650 0 : return false;
651 : }
652 :
653 842 : if (m_osFIDColumn == schema->name)
654 : {
655 0 : CPLError(CE_Failure, CPLE_AppDefined,
656 : "FID column has the same name as this field: %s",
657 0 : schema->name);
658 0 : return false;
659 : }
660 :
661 29789 : for (auto &apoField : m_apoFieldsFromArrowSchema)
662 : {
663 28947 : if (apoField->name() == schema->name)
664 : {
665 0 : CPLError(CE_Failure, CPLE_AppDefined,
666 0 : "Field of name %s already exists", schema->name);
667 0 : return false;
668 : }
669 : }
670 :
671 842 : if (m_poFeatureDefn->GetGeomFieldIndex(schema->name) >= 0)
672 : {
673 0 : CPLError(CE_Failure, CPLE_AppDefined,
674 0 : "Geometry field of name %s already exists", schema->name);
675 0 : return false;
676 : }
677 :
678 : // ImportField() would release the schema, but we don't want that
679 : // So copy the structure content into a local variable, and override its
680 : // release callback to a no-op. This may be a bit fragile, but it doesn't
681 : // look like ImportField implementation tries to access the C ArrowSchema
682 : // after it has been called.
683 842 : struct ArrowSchema lSchema = *schema;
684 842 : const auto DummyFreeSchema = [](struct ArrowSchema *ptrSchema)
685 842 : { ptrSchema->release = nullptr; };
686 842 : lSchema.release = DummyFreeSchema;
687 1684 : auto result = arrow::ImportField(&lSchema);
688 842 : CPLAssert(lSchema.release == nullptr);
689 842 : if (!result.ok())
690 : {
691 0 : CPLError(CE_Failure, CPLE_AppDefined,
692 : "CreateFieldFromArrowSchema() failed");
693 0 : return false;
694 : }
695 842 : m_apoFieldsFromArrowSchema.emplace_back(std::move(*result));
696 842 : return true;
697 : }
698 :
699 : /************************************************************************/
700 : /* GetPreciseArrowGeomEncoding() */
701 : /************************************************************************/
702 :
703 149 : inline OGRArrowGeomEncoding OGRArrowWriterLayer::GetPreciseArrowGeomEncoding(
704 : OGRArrowGeomEncoding eEncodingType, OGRwkbGeometryType eGType)
705 : {
706 149 : CPLAssert(eEncodingType == OGRArrowGeomEncoding::GEOARROW_FSL_GENERIC ||
707 : eEncodingType == OGRArrowGeomEncoding::GEOARROW_STRUCT_GENERIC);
708 149 : const auto eFlatType = wkbFlatten(eGType);
709 149 : if (eFlatType == wkbPoint)
710 : {
711 : return eEncodingType == OGRArrowGeomEncoding::GEOARROW_FSL_GENERIC
712 30 : ? OGRArrowGeomEncoding::GEOARROW_FSL_POINT
713 30 : : OGRArrowGeomEncoding::GEOARROW_STRUCT_POINT;
714 : }
715 119 : else if (eFlatType == wkbLineString)
716 : {
717 : return eEncodingType == OGRArrowGeomEncoding::GEOARROW_FSL_GENERIC
718 21 : ? OGRArrowGeomEncoding::GEOARROW_FSL_LINESTRING
719 21 : : OGRArrowGeomEncoding::GEOARROW_STRUCT_LINESTRING;
720 : }
721 98 : else if (eFlatType == wkbPolygon)
722 : {
723 : return eEncodingType == OGRArrowGeomEncoding::GEOARROW_FSL_GENERIC
724 27 : ? OGRArrowGeomEncoding::GEOARROW_FSL_POLYGON
725 27 : : OGRArrowGeomEncoding::GEOARROW_STRUCT_POLYGON;
726 : }
727 71 : else if (eFlatType == wkbMultiPoint)
728 : {
729 : return eEncodingType == OGRArrowGeomEncoding::GEOARROW_FSL_GENERIC
730 21 : ? OGRArrowGeomEncoding::GEOARROW_FSL_MULTIPOINT
731 21 : : OGRArrowGeomEncoding::GEOARROW_STRUCT_MULTIPOINT;
732 : }
733 50 : else if (eFlatType == wkbMultiLineString)
734 : {
735 : return eEncodingType == OGRArrowGeomEncoding::GEOARROW_FSL_GENERIC
736 21 : ? OGRArrowGeomEncoding::GEOARROW_FSL_MULTILINESTRING
737 21 : : OGRArrowGeomEncoding::GEOARROW_STRUCT_MULTILINESTRING;
738 : }
739 29 : else if (eFlatType == wkbMultiPolygon)
740 : {
741 : return eEncodingType == OGRArrowGeomEncoding::GEOARROW_FSL_GENERIC
742 27 : ? OGRArrowGeomEncoding::GEOARROW_FSL_MULTIPOLYGON
743 27 : : OGRArrowGeomEncoding::GEOARROW_STRUCT_MULTIPOLYGON;
744 : }
745 : else
746 : {
747 2 : CPLError(CE_Failure, CPLE_NotSupported,
748 : "GeoArrow encoding is currently not supported for %s",
749 : OGRGeometryTypeToName(eGType));
750 2 : return eEncodingType;
751 : }
752 : }
753 :
754 : /************************************************************************/
755 : /* GetGeomEncodingAsString() */
756 : /************************************************************************/
757 :
758 : inline const char *
759 552 : OGRArrowWriterLayer::GetGeomEncodingAsString(OGRArrowGeomEncoding eGeomEncoding,
760 : bool bForParquetGeo)
761 : {
762 552 : switch (eGeomEncoding)
763 : {
764 175 : case OGRArrowGeomEncoding::WKB:
765 175 : return bForParquetGeo ? "WKB" : "geoarrow.wkb";
766 111 : case OGRArrowGeomEncoding::WKT:
767 111 : return bForParquetGeo ? "WKT" : "geoarrow.wkt";
768 0 : case OGRArrowGeomEncoding::GEOARROW_FSL_GENERIC:
769 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_GENERIC:
770 0 : CPLAssert(false);
771 : break;
772 16 : case OGRArrowGeomEncoding::GEOARROW_FSL_POINT:
773 16 : return "geoarrow.point";
774 16 : case OGRArrowGeomEncoding::GEOARROW_FSL_LINESTRING:
775 16 : return "geoarrow.linestring";
776 18 : case OGRArrowGeomEncoding::GEOARROW_FSL_POLYGON:
777 18 : return "geoarrow.polygon";
778 16 : case OGRArrowGeomEncoding::GEOARROW_FSL_MULTIPOINT:
779 16 : return "geoarrow.multipoint";
780 16 : case OGRArrowGeomEncoding::GEOARROW_FSL_MULTILINESTRING:
781 16 : return "geoarrow.multilinestring";
782 18 : case OGRArrowGeomEncoding::GEOARROW_FSL_MULTIPOLYGON:
783 18 : return "geoarrow.multipolygon";
784 43 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_POINT:
785 43 : return bForParquetGeo ? "point" : "geoarrow.point";
786 23 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_LINESTRING:
787 23 : return bForParquetGeo ? "linestring" : "geoarrow.linestring";
788 27 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_POLYGON:
789 27 : return bForParquetGeo ? "polygon" : "geoarrow.polygon";
790 23 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_MULTIPOINT:
791 23 : return bForParquetGeo ? "multipoint" : "geoarrow.multipoint";
792 23 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_MULTILINESTRING:
793 23 : return bForParquetGeo ? "multilinestring"
794 23 : : "geoarrow.multilinestring";
795 27 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_MULTIPOLYGON:
796 27 : return bForParquetGeo ? "multipolygon" : "geoarrow.multipolygon";
797 : }
798 0 : return nullptr;
799 : }
800 :
801 : /************************************************************************/
802 : /* CreateGeomField() */
803 : /************************************************************************/
804 :
805 : inline OGRErr
806 27 : OGRArrowWriterLayer::CreateGeomField(const OGRGeomFieldDefn *poField,
807 : int /* bApproxOK */)
808 : {
809 27 : if (m_poSchema)
810 : {
811 1 : CPLError(CE_Failure, CPLE_NotSupported,
812 : "Cannot add field after a first feature has been written");
813 1 : return OGRERR_FAILURE;
814 : }
815 26 : const auto eGType = poField->GetType();
816 26 : if (!IsSupportedGeometryType(eGType))
817 : {
818 0 : return OGRERR_FAILURE;
819 : }
820 :
821 26 : if (IsSRSRequired() && poField->GetSpatialRef() == nullptr)
822 : {
823 0 : CPLError(CE_Warning, CPLE_AppDefined,
824 : "Geometry column should have an associated CRS");
825 : }
826 26 : auto eGeomEncoding = m_eGeomEncoding;
827 26 : if (eGeomEncoding == OGRArrowGeomEncoding::GEOARROW_FSL_GENERIC ||
828 26 : eGeomEncoding == OGRArrowGeomEncoding::GEOARROW_STRUCT_GENERIC)
829 : {
830 0 : const auto eEncodingType = eGeomEncoding;
831 0 : eGeomEncoding = GetPreciseArrowGeomEncoding(eEncodingType, eGType);
832 0 : if (eGeomEncoding == eEncodingType)
833 0 : return OGRERR_FAILURE;
834 : }
835 26 : m_aeGeomEncoding.push_back(eGeomEncoding);
836 26 : m_poFeatureDefn->AddGeomFieldDefn(poField);
837 26 : return OGRERR_NONE;
838 : }
839 :
840 : /************************************************************************/
841 : /* MakeGeoArrowBuilder() */
842 : /************************************************************************/
843 :
844 : static std::shared_ptr<arrow::ArrayBuilder>
845 134 : MakeGeoArrowBuilder(arrow::MemoryPool *poMemoryPool, int nDim, int nDepth)
846 : {
847 134 : if (nDepth == 0)
848 104 : return std::make_shared<arrow::FixedSizeListBuilder>(
849 104 : poMemoryPool, std::make_shared<arrow::DoubleBuilder>(poMemoryPool),
850 52 : nDim);
851 : else
852 164 : return std::make_shared<arrow::ListBuilder>(
853 246 : poMemoryPool, MakeGeoArrowBuilder(poMemoryPool, nDim, nDepth - 1));
854 : }
855 :
856 : /************************************************************************/
857 : /* MakeGeoArrowStructBuilder() */
858 : /************************************************************************/
859 :
860 : static std::shared_ptr<arrow::ArrayBuilder>
861 236 : MakeGeoArrowStructBuilder(arrow::MemoryPool *poMemoryPool, int nDim, int nDepth,
862 : const std::shared_ptr<arrow::DataType> &eBaseType)
863 : {
864 236 : if (nDepth == 0)
865 : {
866 99 : std::vector<std::shared_ptr<arrow::ArrayBuilder>> builders;
867 345 : for (int i = 0; i < nDim; ++i)
868 : builders.emplace_back(
869 246 : std::make_shared<arrow::DoubleBuilder>(poMemoryPool));
870 198 : return std::make_shared<arrow::StructBuilder>(eBaseType, poMemoryPool,
871 198 : std::move(builders));
872 : }
873 : else
874 274 : return std::make_shared<arrow::ListBuilder>(
875 274 : poMemoryPool, MakeGeoArrowStructBuilder(poMemoryPool, nDim,
876 137 : nDepth - 1, eBaseType));
877 : }
878 :
879 : /************************************************************************/
880 : /* ClearArrayBuilers() */
881 : /************************************************************************/
882 :
883 205 : inline void OGRArrowWriterLayer::ClearArrayBuilers()
884 : {
885 205 : m_apoBuilders.clear();
886 205 : m_apoBuildersBBOXStruct.clear();
887 205 : m_apoBuildersBBOXXMin.clear();
888 205 : m_apoBuildersBBOXYMin.clear();
889 205 : m_apoBuildersBBOXXMax.clear();
890 205 : m_apoBuildersBBOXYMax.clear();
891 205 : }
892 :
893 : /************************************************************************/
894 : /* CreateArrayBuilders() */
895 : /************************************************************************/
896 :
897 322 : inline void OGRArrowWriterLayer::CreateArrayBuilders()
898 : {
899 322 : m_apoBuilders.reserve(1 + m_poFeatureDefn->GetFieldCount() +
900 322 : m_poFeatureDefn->GetGeomFieldCount());
901 :
902 322 : int nArrowIdx = 0;
903 322 : if (!m_osFIDColumn.empty())
904 : {
905 49 : m_apoBuilders.emplace_back(std::make_shared<arrow::Int64Builder>());
906 49 : nArrowIdx++;
907 : }
908 :
909 1606 : for (int i = 0; i < m_poFeatureDefn->GetFieldCount(); ++i, ++nArrowIdx)
910 : {
911 1284 : const auto poFieldDefn = m_poFeatureDefn->GetFieldDefn(i);
912 1284 : std::shared_ptr<arrow::ArrayBuilder> builder;
913 1284 : const auto eSubDT = poFieldDefn->GetSubType();
914 1284 : switch (poFieldDefn->GetType())
915 : {
916 145 : case OFTInteger:
917 145 : if (eSubDT == OFSTBoolean)
918 : builder =
919 12 : std::make_shared<arrow::BooleanBuilder>(m_poMemoryPool);
920 133 : else if (eSubDT == OFSTInt16)
921 : builder =
922 12 : std::make_shared<arrow::Int16Builder>(m_poMemoryPool);
923 : else
924 : builder =
925 121 : std::make_shared<arrow::Int32Builder>(m_poMemoryPool);
926 145 : break;
927 :
928 74 : case OFTInteger64:
929 74 : builder = std::make_shared<arrow::Int64Builder>(m_poMemoryPool);
930 74 : break;
931 :
932 106 : case OFTReal:
933 : {
934 212 : const auto arrowType = m_poSchema->fields()[nArrowIdx]->type();
935 106 : if (arrowType->id() == arrow::Type::DECIMAL128)
936 24 : builder = std::make_shared<arrow::Decimal128Builder>(
937 24 : arrowType, m_poMemoryPool);
938 82 : else if (arrowType->id() == arrow::Type::DECIMAL256)
939 0 : builder = std::make_shared<arrow::Decimal256Builder>(
940 0 : arrowType, m_poMemoryPool);
941 82 : else if (eSubDT == OFSTFloat32)
942 : builder =
943 21 : std::make_shared<arrow::FloatBuilder>(m_poMemoryPool);
944 : else
945 : builder =
946 61 : std::make_shared<arrow::DoubleBuilder>(m_poMemoryPool);
947 106 : break;
948 : }
949 :
950 410 : case OFTString:
951 : case OFTWideString:
952 : builder =
953 410 : std::make_shared<arrow::StringBuilder>(m_poMemoryPool);
954 410 : break;
955 :
956 38 : case OFTBinary:
957 38 : if (poFieldDefn->GetWidth() != 0)
958 24 : builder = std::make_shared<arrow::FixedSizeBinaryBuilder>(
959 24 : arrow::fixed_size_binary(poFieldDefn->GetWidth()),
960 24 : m_poMemoryPool);
961 : else
962 : builder =
963 26 : std::make_shared<arrow::BinaryBuilder>(m_poMemoryPool);
964 38 : break;
965 :
966 144 : case OFTIntegerList:
967 : {
968 144 : std::shared_ptr<arrow::ArrayBuilder> poBaseBuilder;
969 144 : if (eSubDT == OFSTBoolean)
970 : poBaseBuilder =
971 24 : std::make_shared<arrow::BooleanBuilder>(m_poMemoryPool);
972 120 : else if (eSubDT == OFSTInt16)
973 : poBaseBuilder =
974 0 : std::make_shared<arrow::Int16Builder>(m_poMemoryPool);
975 : else
976 : poBaseBuilder =
977 120 : std::make_shared<arrow::Int32Builder>(m_poMemoryPool);
978 288 : builder = std::make_shared<arrow::ListBuilder>(m_poMemoryPool,
979 144 : poBaseBuilder);
980 144 : break;
981 : }
982 :
983 60 : case OFTInteger64List:
984 60 : builder = std::make_shared<arrow::ListBuilder>(
985 60 : m_poMemoryPool,
986 180 : std::make_shared<arrow::Int64Builder>(m_poMemoryPool));
987 :
988 60 : break;
989 :
990 105 : case OFTRealList:
991 105 : if (eSubDT == OFSTFloat32)
992 33 : builder = std::make_shared<arrow::ListBuilder>(
993 33 : m_poMemoryPool,
994 99 : std::make_shared<arrow::FloatBuilder>(m_poMemoryPool));
995 : else
996 72 : builder = std::make_shared<arrow::ListBuilder>(
997 72 : m_poMemoryPool,
998 216 : std::make_shared<arrow::DoubleBuilder>(m_poMemoryPool));
999 105 : break;
1000 :
1001 36 : case OFTStringList:
1002 : case OFTWideStringList:
1003 36 : builder = std::make_shared<arrow::ListBuilder>(
1004 36 : m_poMemoryPool,
1005 108 : std::make_shared<arrow::StringBuilder>(m_poMemoryPool));
1006 :
1007 36 : break;
1008 :
1009 47 : case OFTDate:
1010 : builder =
1011 47 : std::make_shared<arrow::Date32Builder>(m_poMemoryPool);
1012 47 : break;
1013 :
1014 24 : case OFTTime:
1015 48 : builder = std::make_shared<arrow::Time32Builder>(
1016 72 : arrow::time32(arrow::TimeUnit::MILLI), m_poMemoryPool);
1017 24 : break;
1018 :
1019 95 : case OFTDateTime:
1020 190 : builder = std::make_shared<arrow::TimestampBuilder>(
1021 285 : arrow::timestamp(arrow::TimeUnit::MILLI), m_poMemoryPool);
1022 95 : break;
1023 : }
1024 1284 : m_apoBuilders.emplace_back(builder);
1025 : }
1026 :
1027 644 : for (int i = 0; i < m_poFeatureDefn->GetGeomFieldCount(); ++i, ++nArrowIdx)
1028 : {
1029 322 : std::shared_ptr<arrow::ArrayBuilder> builder;
1030 322 : const auto poGeomFieldDefn = m_poFeatureDefn->GetGeomFieldDefn(i);
1031 322 : const auto eGType = poGeomFieldDefn->GetType();
1032 : const int nDim =
1033 322 : 2 + (OGR_GT_HasZ(eGType) ? 1 : 0) + (OGR_GT_HasM(eGType) ? 1 : 0);
1034 :
1035 322 : switch (m_aeGeomEncoding[i])
1036 : {
1037 118 : case OGRArrowGeomEncoding::WKB:
1038 : builder =
1039 118 : std::make_shared<arrow::BinaryBuilder>(m_poMemoryPool);
1040 118 : break;
1041 :
1042 53 : case OGRArrowGeomEncoding::WKT:
1043 : builder =
1044 53 : std::make_shared<arrow::StringBuilder>(m_poMemoryPool);
1045 53 : break;
1046 :
1047 8 : case OGRArrowGeomEncoding::GEOARROW_FSL_POINT:
1048 8 : builder = MakeGeoArrowBuilder(m_poMemoryPool, nDim, 0);
1049 8 : break;
1050 :
1051 8 : case OGRArrowGeomEncoding::GEOARROW_FSL_LINESTRING:
1052 8 : builder = MakeGeoArrowBuilder(m_poMemoryPool, nDim, 1);
1053 8 : break;
1054 :
1055 10 : case OGRArrowGeomEncoding::GEOARROW_FSL_POLYGON:
1056 10 : builder = MakeGeoArrowBuilder(m_poMemoryPool, nDim, 2);
1057 10 : break;
1058 :
1059 8 : case OGRArrowGeomEncoding::GEOARROW_FSL_MULTIPOINT:
1060 8 : builder = MakeGeoArrowBuilder(m_poMemoryPool, nDim, 1);
1061 8 : break;
1062 :
1063 8 : case OGRArrowGeomEncoding::GEOARROW_FSL_MULTILINESTRING:
1064 8 : builder = MakeGeoArrowBuilder(m_poMemoryPool, nDim, 2);
1065 8 : break;
1066 :
1067 10 : case OGRArrowGeomEncoding::GEOARROW_FSL_MULTIPOLYGON:
1068 10 : builder = MakeGeoArrowBuilder(m_poMemoryPool, nDim, 3);
1069 10 : break;
1070 :
1071 26 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_POINT:
1072 52 : builder = MakeGeoArrowStructBuilder(m_poMemoryPool, nDim, 0,
1073 52 : m_apoBaseStructGeomType[i]);
1074 26 : break;
1075 :
1076 13 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_LINESTRING:
1077 26 : builder = MakeGeoArrowStructBuilder(m_poMemoryPool, nDim, 1,
1078 26 : m_apoBaseStructGeomType[i]);
1079 13 : break;
1080 :
1081 17 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_POLYGON:
1082 34 : builder = MakeGeoArrowStructBuilder(m_poMemoryPool, nDim, 2,
1083 34 : m_apoBaseStructGeomType[i]);
1084 17 : break;
1085 :
1086 13 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_MULTIPOINT:
1087 26 : builder = MakeGeoArrowStructBuilder(m_poMemoryPool, nDim, 1,
1088 26 : m_apoBaseStructGeomType[i]);
1089 13 : break;
1090 :
1091 13 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_MULTILINESTRING:
1092 26 : builder = MakeGeoArrowStructBuilder(m_poMemoryPool, nDim, 2,
1093 26 : m_apoBaseStructGeomType[i]);
1094 13 : break;
1095 :
1096 17 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_MULTIPOLYGON:
1097 34 : builder = MakeGeoArrowStructBuilder(m_poMemoryPool, nDim, 3,
1098 34 : m_apoBaseStructGeomType[i]);
1099 17 : break;
1100 :
1101 0 : case OGRArrowGeomEncoding::GEOARROW_FSL_GENERIC:
1102 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_GENERIC:
1103 0 : CPLAssert(false);
1104 : break;
1105 : }
1106 :
1107 322 : m_apoBuilders.emplace_back(builder);
1108 :
1109 322 : if (m_bWriteBBoxStruct)
1110 : {
1111 : m_apoBuildersBBOXXMin.emplace_back(
1112 152 : std::make_shared<arrow::FloatBuilder>(m_poMemoryPool));
1113 : m_apoBuildersBBOXYMin.emplace_back(
1114 152 : std::make_shared<arrow::FloatBuilder>(m_poMemoryPool));
1115 : m_apoBuildersBBOXXMax.emplace_back(
1116 152 : std::make_shared<arrow::FloatBuilder>(m_poMemoryPool));
1117 : m_apoBuildersBBOXYMax.emplace_back(
1118 152 : std::make_shared<arrow::FloatBuilder>(m_poMemoryPool));
1119 : m_apoBuildersBBOXStruct.emplace_back(
1120 304 : std::make_shared<arrow::StructBuilder>(
1121 152 : m_apoFieldsBBOX[i]->type(), m_poMemoryPool,
1122 1368 : std::vector<std::shared_ptr<arrow::ArrayBuilder>>{
1123 152 : m_apoBuildersBBOXXMin.back(),
1124 152 : m_apoBuildersBBOXYMin.back(),
1125 152 : m_apoBuildersBBOXXMax.back(),
1126 1064 : m_apoBuildersBBOXYMax.back()}));
1127 : }
1128 : }
1129 322 : }
1130 :
1131 : /************************************************************************/
1132 : /* castToFloatDown() */
1133 : /************************************************************************/
1134 :
1135 : // Cf https://github.com/sqlite/sqlite/blob/90e4a3b7fcdf63035d6f35eb44d11ff58ff4b068/ext/rtree/rtree.c#L2993C1-L2995C3
1136 : /*
1137 : ** Rounding constants for float->double conversion.
1138 : */
1139 : #define RNDTOWARDS (1.0 - 1.0 / 8388608.0) /* Round towards zero */
1140 : #define RNDAWAY (1.0 + 1.0 / 8388608.0) /* Round away from zero */
1141 :
1142 : /*
1143 : ** Convert an sqlite3_value into an RtreeValue (presumably a float)
1144 : ** while taking care to round toward negative or positive, respectively.
1145 : */
1146 3264 : static float castToFloatDown(double d)
1147 : {
1148 3264 : float f = static_cast<float>(d);
1149 3264 : if (f > d)
1150 : {
1151 0 : f = static_cast<float>(d * (d < 0 ? RNDAWAY : RNDTOWARDS));
1152 : }
1153 3264 : return f;
1154 : }
1155 :
1156 3264 : static float castToFloatUp(double d)
1157 : {
1158 3264 : float f = static_cast<float>(d);
1159 3264 : if (f < d)
1160 : {
1161 0 : f = static_cast<float>(d * (d < 0 ? RNDTOWARDS : RNDAWAY));
1162 : }
1163 3264 : return f;
1164 : }
1165 :
1166 : /************************************************************************/
1167 : /* GeoArrowLineBuilder() */
1168 : /************************************************************************/
1169 :
1170 : template <class PointBuilderType>
1171 356 : static OGRErr GeoArrowLineBuilder(const OGRLineString *poLS,
1172 : PointBuilderType *poPointBuilder,
1173 : arrow::DoubleBuilder *poXBuilder,
1174 : arrow::DoubleBuilder *poYBuilder,
1175 : arrow::DoubleBuilder *poZBuilder,
1176 : arrow::DoubleBuilder *poMBuilder)
1177 : {
1178 1644 : for (int j = 0; j < poLS->getNumPoints(); ++j)
1179 : {
1180 1288 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poPointBuilder->Append());
1181 1288 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poXBuilder->Append(poLS->getX(j)));
1182 1288 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poYBuilder->Append(poLS->getY(j)));
1183 1288 : if (poZBuilder)
1184 412 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poZBuilder->Append(poLS->getZ(j)));
1185 1288 : if (poMBuilder)
1186 220 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poMBuilder->Append(poLS->getM(j)));
1187 : }
1188 356 : return OGRERR_NONE;
1189 : }
1190 :
1191 : /************************************************************************/
1192 : /* BuildGeometry() */
1193 : /************************************************************************/
1194 :
1195 3312 : inline OGRErr OGRArrowWriterLayer::BuildGeometry(OGRGeometry *poGeom,
1196 : int iGeomField,
1197 : arrow::ArrayBuilder *poBuilder)
1198 : {
1199 3312 : const auto eGType = poGeom ? poGeom->getGeometryType() : wkbNone;
1200 : const auto eColumnGType =
1201 3312 : m_poFeatureDefn->GetGeomFieldDefn(iGeomField)->GetType();
1202 3312 : const bool bHasZ = CPL_TO_BOOL(OGR_GT_HasZ(eColumnGType));
1203 3312 : const bool bHasM = CPL_TO_BOOL(OGR_GT_HasM(eColumnGType));
1204 3312 : const bool bIsEmpty = poGeom != nullptr && poGeom->IsEmpty();
1205 3312 : OGREnvelope3D oEnvelope;
1206 3312 : if (poGeom != nullptr && !bIsEmpty)
1207 : {
1208 1928 : if (poGeom->Is3D())
1209 : {
1210 210 : poGeom->getEnvelope(&oEnvelope);
1211 210 : m_aoEnvelopes[iGeomField].Merge(oEnvelope);
1212 : }
1213 : else
1214 : {
1215 1718 : poGeom->getEnvelope(static_cast<OGREnvelope *>(&oEnvelope));
1216 1718 : m_aoEnvelopes[iGeomField].Merge(oEnvelope);
1217 : }
1218 1928 : m_oSetWrittenGeometryTypes[iGeomField].insert(eGType);
1219 : }
1220 :
1221 3312 : if (m_bWriteBBoxStruct)
1222 : {
1223 2735 : if (poGeom && !bIsEmpty)
1224 : {
1225 1607 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1226 : m_apoBuildersBBOXXMin[iGeomField]->Append(
1227 : castToFloatDown(oEnvelope.MinX)));
1228 1607 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1229 : m_apoBuildersBBOXYMin[iGeomField]->Append(
1230 : castToFloatDown(oEnvelope.MinY)));
1231 1607 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1232 : m_apoBuildersBBOXXMax[iGeomField]->Append(
1233 : castToFloatUp(oEnvelope.MaxX)));
1234 1607 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1235 : m_apoBuildersBBOXYMax[iGeomField]->Append(
1236 : castToFloatUp(oEnvelope.MaxY)));
1237 1607 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1238 : m_apoBuildersBBOXStruct[iGeomField]->Append());
1239 : }
1240 : else
1241 : {
1242 1128 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1243 : m_apoBuildersBBOXStruct[iGeomField]->AppendNull());
1244 : }
1245 : }
1246 :
1247 3312 : if (poGeom == nullptr)
1248 : {
1249 3654 : if (m_aeGeomEncoding[iGeomField] ==
1250 1226 : OGRArrowGeomEncoding::GEOARROW_FSL_POINT &&
1251 1226 : GetDriverUCName() == "PARQUET")
1252 : {
1253 : // For some reason, Parquet doesn't support a NULL FixedSizeList
1254 : // on reading
1255 4 : auto poPointBuilder =
1256 : static_cast<arrow::FixedSizeListBuilder *>(poBuilder);
1257 4 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poPointBuilder->Append());
1258 : auto poValueBuilder = static_cast<arrow::DoubleBuilder *>(
1259 4 : poPointBuilder->value_builder());
1260 4 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poValueBuilder->Append(
1261 : std::numeric_limits<double>::quiet_NaN()));
1262 4 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poValueBuilder->Append(
1263 : std::numeric_limits<double>::quiet_NaN()));
1264 4 : if (bHasZ)
1265 2 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poValueBuilder->Append(
1266 : std::numeric_limits<double>::quiet_NaN()));
1267 4 : if (bHasM)
1268 0 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poValueBuilder->Append(
1269 : std::numeric_limits<double>::quiet_NaN()));
1270 : }
1271 : else
1272 : {
1273 1214 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poBuilder->AppendNull());
1274 : }
1275 :
1276 1218 : return OGRERR_NONE;
1277 : }
1278 :
1279 : // The following checks are only valid for GeoArrow encoding
1280 2880 : if (m_aeGeomEncoding[iGeomField] != OGRArrowGeomEncoding::WKB &&
1281 786 : m_aeGeomEncoding[iGeomField] != OGRArrowGeomEncoding::WKT)
1282 : {
1283 610 : if ((!bIsEmpty && eGType != eColumnGType) ||
1284 132 : (bIsEmpty && wkbFlatten(eGType) != wkbFlatten(eColumnGType)))
1285 : {
1286 6 : CPLError(CE_Warning, CPLE_AppDefined,
1287 : "Geometry of type %s found, whereas %s is expected. "
1288 : "Writing null geometry",
1289 : OGRGeometryTypeToName(eGType),
1290 : OGRGeometryTypeToName(eColumnGType));
1291 6 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poBuilder->AppendNull());
1292 :
1293 6 : return OGRERR_NONE;
1294 : }
1295 : }
1296 :
1297 2088 : switch (m_aeGeomEncoding[iGeomField])
1298 : {
1299 1308 : case OGRArrowGeomEncoding::WKB:
1300 : {
1301 0 : std::unique_ptr<OGRGeometry> poGeomModified;
1302 1308 : if (OGR_GT_HasM(eGType) && !OGR_GT_HasM(eColumnGType))
1303 : {
1304 : static bool bHasWarned = false;
1305 0 : if (!bHasWarned)
1306 : {
1307 0 : CPLError(CE_Warning, CPLE_AppDefined,
1308 : "Removing M component from geometry");
1309 0 : bHasWarned = true;
1310 : }
1311 0 : poGeomModified.reset(poGeom->clone());
1312 0 : poGeomModified->setMeasured(false);
1313 0 : poGeom = poGeomModified.get();
1314 : }
1315 1308 : FixupGeometryBeforeWriting(poGeom);
1316 1308 : const auto nSize = poGeom->WkbSize();
1317 1308 : if (nSize < INT_MAX)
1318 : {
1319 1308 : m_abyBuffer.resize(nSize);
1320 1308 : poGeom->exportToWkb(wkbNDR, &m_abyBuffer[0], wkbVariantIso);
1321 1308 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1322 : static_cast<arrow::BinaryBuilder *>(poBuilder)->Append(
1323 : m_abyBuffer.data(),
1324 : static_cast<int>(m_abyBuffer.size())));
1325 : }
1326 : else
1327 : {
1328 0 : CPLError(CE_Warning, CPLE_AppDefined,
1329 : "Too big geometry. "
1330 : "Writing null geometry");
1331 0 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poBuilder->AppendNull());
1332 : }
1333 1308 : break;
1334 : }
1335 :
1336 308 : case OGRArrowGeomEncoding::WKT:
1337 : {
1338 308 : OGRWktOptions options;
1339 308 : options.variant = wkbVariantIso;
1340 308 : if (m_nWKTCoordinatePrecision >= 0)
1341 : {
1342 0 : options.format = OGRWktFormat::F;
1343 0 : options.xyPrecision = m_nWKTCoordinatePrecision;
1344 0 : options.zPrecision = m_nWKTCoordinatePrecision;
1345 0 : options.mPrecision = m_nWKTCoordinatePrecision;
1346 : }
1347 308 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1348 : static_cast<arrow::StringBuilder *>(poBuilder)->Append(
1349 : poGeom->exportToWkt(options)));
1350 308 : break;
1351 : }
1352 :
1353 20 : case OGRArrowGeomEncoding::GEOARROW_FSL_POINT:
1354 : {
1355 20 : const auto poPoint = poGeom->toPoint();
1356 20 : auto poPointBuilder =
1357 : static_cast<arrow::FixedSizeListBuilder *>(poBuilder);
1358 20 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poPointBuilder->Append());
1359 : auto poValueBuilder = static_cast<arrow::DoubleBuilder *>(
1360 20 : poPointBuilder->value_builder());
1361 20 : if (bIsEmpty)
1362 : {
1363 8 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poValueBuilder->Append(
1364 : std::numeric_limits<double>::quiet_NaN()));
1365 8 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poValueBuilder->Append(
1366 : std::numeric_limits<double>::quiet_NaN()));
1367 8 : if (bHasZ)
1368 4 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poValueBuilder->Append(
1369 : std::numeric_limits<double>::quiet_NaN()));
1370 8 : if (bHasM)
1371 2 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poValueBuilder->Append(
1372 : std::numeric_limits<double>::quiet_NaN()));
1373 : }
1374 : else
1375 : {
1376 12 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1377 : poValueBuilder->Append(poPoint->getX()));
1378 12 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1379 : poValueBuilder->Append(poPoint->getY()));
1380 12 : if (bHasZ)
1381 6 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1382 : poValueBuilder->Append(poPoint->getZ()));
1383 12 : if (bHasM)
1384 2 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1385 : poValueBuilder->Append(poPoint->getM()));
1386 : }
1387 20 : break;
1388 : }
1389 :
1390 : #define GET_XYZM_STRUCT_FIELD_BUILDERS_FROM(poPointBuilder) \
1391 : auto poXBuilder = \
1392 : static_cast<arrow::DoubleBuilder *>(poPointBuilder->field_builder(0)); \
1393 : auto poYBuilder = \
1394 : static_cast<arrow::DoubleBuilder *>(poPointBuilder->field_builder(1)); \
1395 : int iSubField = 2; \
1396 : arrow::DoubleBuilder *poZBuilder = nullptr; \
1397 : if (bHasZ) \
1398 : { \
1399 : poZBuilder = static_cast<arrow::DoubleBuilder *>( \
1400 : poPointBuilder->field_builder(iSubField)); \
1401 : ++iSubField; \
1402 : } \
1403 : arrow::DoubleBuilder *poMBuilder = nullptr; \
1404 : if (bHasM) \
1405 : { \
1406 : poMBuilder = static_cast<arrow::DoubleBuilder *>( \
1407 : poPointBuilder->field_builder(iSubField)); \
1408 : } \
1409 : do \
1410 : { \
1411 : } while (0)
1412 :
1413 61 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_POINT:
1414 : {
1415 61 : const auto poPoint = poGeom->toPoint();
1416 61 : auto poPointBuilder =
1417 : static_cast<arrow::StructBuilder *>(poBuilder);
1418 61 : GET_XYZM_STRUCT_FIELD_BUILDERS_FROM(poPointBuilder);
1419 61 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poPointBuilder->Append());
1420 :
1421 61 : if (bIsEmpty)
1422 : {
1423 12 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poXBuilder->Append(
1424 : std::numeric_limits<double>::quiet_NaN()));
1425 12 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poYBuilder->Append(
1426 : std::numeric_limits<double>::quiet_NaN()));
1427 : }
1428 : else
1429 : {
1430 49 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1431 : poXBuilder->Append(poPoint->getX()));
1432 49 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1433 : poYBuilder->Append(poPoint->getY()));
1434 : }
1435 61 : if (poZBuilder)
1436 : {
1437 16 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poZBuilder->Append(
1438 : bIsEmpty ? std::numeric_limits<double>::quiet_NaN()
1439 : : poPoint->getZ()));
1440 : }
1441 61 : if (poMBuilder)
1442 : {
1443 4 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poMBuilder->Append(
1444 : bIsEmpty ? std::numeric_limits<double>::quiet_NaN()
1445 : : poPoint->getM()));
1446 : }
1447 61 : break;
1448 : }
1449 :
1450 20 : case OGRArrowGeomEncoding::GEOARROW_FSL_LINESTRING:
1451 : {
1452 20 : const auto poLS = poGeom->toLineString();
1453 20 : auto poListBuilder = static_cast<arrow::ListBuilder *>(poBuilder);
1454 : auto poPointBuilder = static_cast<arrow::FixedSizeListBuilder *>(
1455 20 : poListBuilder->value_builder());
1456 : auto poValueBuilder = static_cast<arrow::DoubleBuilder *>(
1457 20 : poPointBuilder->value_builder());
1458 :
1459 20 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poListBuilder->Append());
1460 20 : OGR_ARROW_PROPAGATE_OGRERR(GeoArrowLineBuilder(
1461 : poLS, poPointBuilder, poValueBuilder, poValueBuilder,
1462 : bHasZ ? poValueBuilder : nullptr,
1463 : bHasM ? poValueBuilder : nullptr));
1464 20 : break;
1465 : }
1466 :
1467 33 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_LINESTRING:
1468 : {
1469 33 : const auto poLS = poGeom->toLineString();
1470 33 : auto poListBuilder = static_cast<arrow::ListBuilder *>(poBuilder);
1471 : auto poPointBuilder = static_cast<arrow::StructBuilder *>(
1472 33 : poListBuilder->value_builder());
1473 33 : GET_XYZM_STRUCT_FIELD_BUILDERS_FROM(poPointBuilder);
1474 :
1475 33 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poListBuilder->Append());
1476 33 : OGR_ARROW_PROPAGATE_OGRERR(
1477 : GeoArrowLineBuilder(poLS, poPointBuilder, poXBuilder,
1478 : poYBuilder, poZBuilder, poMBuilder));
1479 33 : break;
1480 : }
1481 :
1482 32 : case OGRArrowGeomEncoding::GEOARROW_FSL_POLYGON:
1483 : {
1484 32 : const auto poPolygon = poGeom->toPolygon();
1485 32 : auto poPolygonBuilder =
1486 : static_cast<arrow::ListBuilder *>(poBuilder);
1487 : auto poRingBuilder = static_cast<arrow::ListBuilder *>(
1488 32 : poPolygonBuilder->value_builder());
1489 : auto poPointBuilder = static_cast<arrow::FixedSizeListBuilder *>(
1490 32 : poRingBuilder->value_builder());
1491 : auto poValueBuilder = static_cast<arrow::DoubleBuilder *>(
1492 32 : poPointBuilder->value_builder());
1493 32 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poPolygonBuilder->Append());
1494 62 : for (const auto *poRing : *poPolygon)
1495 : {
1496 30 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poRingBuilder->Append());
1497 30 : OGR_ARROW_PROPAGATE_OGRERR(GeoArrowLineBuilder(
1498 : poRing, poPointBuilder, poValueBuilder, poValueBuilder,
1499 : bHasZ ? poValueBuilder : nullptr,
1500 : bHasM ? poValueBuilder : nullptr));
1501 : }
1502 32 : break;
1503 : }
1504 :
1505 53 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_POLYGON:
1506 : {
1507 53 : const auto poPolygon = poGeom->toPolygon();
1508 53 : auto poPolygonBuilder =
1509 : static_cast<arrow::ListBuilder *>(poBuilder);
1510 : auto poRingBuilder = static_cast<arrow::ListBuilder *>(
1511 53 : poPolygonBuilder->value_builder());
1512 : auto poPointBuilder = static_cast<arrow::StructBuilder *>(
1513 53 : poRingBuilder->value_builder());
1514 53 : GET_XYZM_STRUCT_FIELD_BUILDERS_FROM(poPointBuilder);
1515 :
1516 53 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poPolygonBuilder->Append());
1517 102 : for (const auto *poRing : *poPolygon)
1518 : {
1519 49 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poRingBuilder->Append());
1520 49 : OGR_ARROW_PROPAGATE_OGRERR(
1521 : GeoArrowLineBuilder(poRing, poPointBuilder, poXBuilder,
1522 : poYBuilder, poZBuilder, poMBuilder));
1523 : }
1524 53 : break;
1525 : }
1526 :
1527 32 : case OGRArrowGeomEncoding::GEOARROW_FSL_MULTIPOINT:
1528 : {
1529 32 : const auto poMultiPoint = poGeom->toMultiPoint();
1530 32 : auto poListBuilder = static_cast<arrow::ListBuilder *>(poBuilder);
1531 : auto poPointBuilder = static_cast<arrow::FixedSizeListBuilder *>(
1532 32 : poListBuilder->value_builder());
1533 : auto poValueBuilder = static_cast<arrow::DoubleBuilder *>(
1534 32 : poPointBuilder->value_builder());
1535 32 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poListBuilder->Append());
1536 88 : for (const auto *poPoint : *poMultiPoint)
1537 : {
1538 56 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poPointBuilder->Append());
1539 56 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1540 : poValueBuilder->Append(poPoint->getX()));
1541 56 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1542 : poValueBuilder->Append(poPoint->getY()));
1543 56 : if (bHasZ)
1544 28 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1545 : poValueBuilder->Append(poPoint->getZ()));
1546 56 : if (bHasM)
1547 18 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1548 : poValueBuilder->Append(poPoint->getM()));
1549 : }
1550 32 : break;
1551 : }
1552 :
1553 49 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_MULTIPOINT:
1554 : {
1555 49 : const auto poMultiPoint = poGeom->toMultiPoint();
1556 49 : auto poListBuilder = static_cast<arrow::ListBuilder *>(poBuilder);
1557 : auto poPointBuilder = static_cast<arrow::StructBuilder *>(
1558 49 : poListBuilder->value_builder());
1559 49 : GET_XYZM_STRUCT_FIELD_BUILDERS_FROM(poPointBuilder);
1560 :
1561 49 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poListBuilder->Append());
1562 126 : for (const auto *poPoint : *poMultiPoint)
1563 : {
1564 77 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poPointBuilder->Append());
1565 77 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1566 : poXBuilder->Append(poPoint->getX()));
1567 77 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1568 : poYBuilder->Append(poPoint->getY()));
1569 77 : if (poZBuilder)
1570 38 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1571 : poZBuilder->Append(poPoint->getZ()));
1572 77 : if (poMBuilder)
1573 18 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1574 : poMBuilder->Append(poPoint->getM()));
1575 : }
1576 49 : break;
1577 : }
1578 :
1579 28 : case OGRArrowGeomEncoding::GEOARROW_FSL_MULTILINESTRING:
1580 : {
1581 28 : const auto poMLS = poGeom->toMultiLineString();
1582 28 : auto poMLSBuilder = static_cast<arrow::ListBuilder *>(poBuilder);
1583 : auto poLSBuilder = static_cast<arrow::ListBuilder *>(
1584 28 : poMLSBuilder->value_builder());
1585 : auto poPointBuilder = static_cast<arrow::FixedSizeListBuilder *>(
1586 28 : poLSBuilder->value_builder());
1587 : auto poValueBuilder = static_cast<arrow::DoubleBuilder *>(
1588 28 : poPointBuilder->value_builder());
1589 28 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poMLSBuilder->Append());
1590 60 : for (const auto *poLS : *poMLS)
1591 : {
1592 32 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poLSBuilder->Append());
1593 32 : OGR_ARROW_PROPAGATE_OGRERR(GeoArrowLineBuilder(
1594 : poLS, poPointBuilder, poValueBuilder, poValueBuilder,
1595 : bHasZ ? poValueBuilder : nullptr,
1596 : bHasM ? poValueBuilder : nullptr));
1597 : }
1598 28 : break;
1599 : }
1600 :
1601 45 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_MULTILINESTRING:
1602 : {
1603 45 : const auto poMLS = poGeom->toMultiLineString();
1604 45 : auto poMLSBuilder = static_cast<arrow::ListBuilder *>(poBuilder);
1605 : auto poLSBuilder = static_cast<arrow::ListBuilder *>(
1606 45 : poMLSBuilder->value_builder());
1607 : auto poPointBuilder = static_cast<arrow::StructBuilder *>(
1608 45 : poLSBuilder->value_builder());
1609 45 : GET_XYZM_STRUCT_FIELD_BUILDERS_FROM(poPointBuilder);
1610 :
1611 45 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poMLSBuilder->Append());
1612 98 : for (const auto *poLS : *poMLS)
1613 : {
1614 53 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poLSBuilder->Append());
1615 53 : OGR_ARROW_PROPAGATE_OGRERR(
1616 : GeoArrowLineBuilder(poLS, poPointBuilder, poXBuilder,
1617 : poYBuilder, poZBuilder, poMBuilder));
1618 : }
1619 45 : break;
1620 : }
1621 :
1622 38 : case OGRArrowGeomEncoding::GEOARROW_FSL_MULTIPOLYGON:
1623 : {
1624 38 : const auto poMPoly = poGeom->toMultiPolygon();
1625 38 : auto poMPolyBuilder = static_cast<arrow::ListBuilder *>(poBuilder);
1626 : auto poPolyBuilder = static_cast<arrow::ListBuilder *>(
1627 38 : poMPolyBuilder->value_builder());
1628 : auto poRingBuilder = static_cast<arrow::ListBuilder *>(
1629 38 : poPolyBuilder->value_builder());
1630 : auto poPointBuilder = static_cast<arrow::FixedSizeListBuilder *>(
1631 38 : poRingBuilder->value_builder());
1632 : auto poValueBuilder = static_cast<arrow::DoubleBuilder *>(
1633 38 : poPointBuilder->value_builder());
1634 38 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poMPolyBuilder->Append());
1635 82 : for (const auto *poPolygon : *poMPoly)
1636 : {
1637 44 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poPolyBuilder->Append());
1638 98 : for (const auto *poRing : *poPolygon)
1639 : {
1640 54 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poRingBuilder->Append());
1641 54 : OGR_ARROW_PROPAGATE_OGRERR(GeoArrowLineBuilder(
1642 : poRing, poPointBuilder, poValueBuilder, poValueBuilder,
1643 : bHasZ ? poValueBuilder : nullptr,
1644 : bHasM ? poValueBuilder : nullptr));
1645 : }
1646 : }
1647 38 : break;
1648 : }
1649 :
1650 61 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_MULTIPOLYGON:
1651 : {
1652 61 : const auto poMPoly = poGeom->toMultiPolygon();
1653 61 : auto poMPolyBuilder = static_cast<arrow::ListBuilder *>(poBuilder);
1654 : auto poPolyBuilder = static_cast<arrow::ListBuilder *>(
1655 61 : poMPolyBuilder->value_builder());
1656 : auto poRingBuilder = static_cast<arrow::ListBuilder *>(
1657 61 : poPolyBuilder->value_builder());
1658 : auto poPointBuilder = static_cast<arrow::StructBuilder *>(
1659 61 : poRingBuilder->value_builder());
1660 61 : GET_XYZM_STRUCT_FIELD_BUILDERS_FROM(poPointBuilder);
1661 :
1662 61 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poMPolyBuilder->Append());
1663 130 : for (const auto *poPolygon : *poMPoly)
1664 : {
1665 69 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poPolyBuilder->Append());
1666 154 : for (const auto *poRing : *poPolygon)
1667 : {
1668 85 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poRingBuilder->Append());
1669 85 : OGR_ARROW_PROPAGATE_OGRERR(GeoArrowLineBuilder(
1670 : poRing, poPointBuilder, poXBuilder, poYBuilder,
1671 : poZBuilder, poMBuilder));
1672 : }
1673 : }
1674 61 : break;
1675 : }
1676 :
1677 0 : case OGRArrowGeomEncoding::GEOARROW_FSL_GENERIC:
1678 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_GENERIC:
1679 : {
1680 0 : CPLAssert(false);
1681 : break;
1682 : }
1683 : }
1684 :
1685 2088 : return OGRERR_NONE;
1686 : }
1687 :
1688 : /************************************************************************/
1689 : /* ICreateFeature() */
1690 : /************************************************************************/
1691 :
1692 2852 : inline OGRErr OGRArrowWriterLayer::ICreateFeature(OGRFeature *poFeature)
1693 : {
1694 2852 : if (m_poSchema == nullptr)
1695 : {
1696 166 : CreateSchema();
1697 : }
1698 :
1699 2852 : if (m_apoBuilders.empty())
1700 : {
1701 203 : if (!m_apoFieldsFromArrowSchema.empty())
1702 : {
1703 0 : CPLError(CE_Failure, CPLE_NotSupported,
1704 : "ICreateFeature() cannot be used after "
1705 : "CreateFieldFromArrowSchema()");
1706 0 : return OGRERR_FAILURE;
1707 : }
1708 203 : CreateArrayBuilders();
1709 : }
1710 :
1711 : // First pass to check not-null constraints as Arrow doesn't seem
1712 : // to do that on the writing side. But such files can't be read.
1713 2852 : const int nFieldCount = m_poFeatureDefn->GetFieldCount();
1714 7753 : for (int i = 0; i < nFieldCount; ++i)
1715 : {
1716 4902 : const auto poFieldDefn = m_poFeatureDefn->GetFieldDefn(i);
1717 4904 : if (!poFieldDefn->IsNullable() &&
1718 2 : !poFeature->IsFieldSetAndNotNullUnsafe(i))
1719 : {
1720 1 : CPLError(CE_Failure, CPLE_AppDefined,
1721 : "Null value found in non-nullable field %s",
1722 : poFieldDefn->GetNameRef());
1723 1 : return OGRERR_FAILURE;
1724 : }
1725 : }
1726 :
1727 2851 : const int nGeomFieldCount = m_poFeatureDefn->GetGeomFieldCount();
1728 5814 : for (int i = 0; i < nGeomFieldCount; ++i)
1729 : {
1730 2963 : const auto poGeomFieldDefn = m_poFeatureDefn->GetGeomFieldDefn(i);
1731 2995 : if (!poGeomFieldDefn->IsNullable() &&
1732 32 : poFeature->GetGeomFieldRef(i) == nullptr)
1733 : {
1734 0 : CPLError(CE_Failure, CPLE_AppDefined,
1735 : "Null value found in non-nullable geometry field %s",
1736 : poGeomFieldDefn->GetNameRef());
1737 0 : return OGRERR_FAILURE;
1738 : }
1739 : }
1740 :
1741 : // Write FID, if FID column present
1742 2851 : int nArrowIdx = 0;
1743 2851 : if (!m_osFIDColumn.empty())
1744 : {
1745 2257 : int64_t nFID = poFeature->GetFID();
1746 2257 : if (nFID == OGRNullFID)
1747 : {
1748 36 : nFID = m_nFeatureCount;
1749 36 : poFeature->SetFID(nFID);
1750 : }
1751 : auto poBuilder =
1752 2257 : static_cast<arrow::Int64Builder *>(m_apoBuilders[0].get());
1753 2257 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poBuilder->Append(nFID));
1754 2257 : nArrowIdx++;
1755 : }
1756 :
1757 : // Write attributes
1758 7752 : for (int i = 0; i < nFieldCount; ++i, ++nArrowIdx)
1759 : {
1760 4901 : auto poBuilder = m_apoBuilders[nArrowIdx].get();
1761 4901 : if (!poFeature->IsFieldSetAndNotNullUnsafe(i))
1762 : {
1763 714 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poBuilder->AppendNull());
1764 714 : continue;
1765 : }
1766 :
1767 4187 : const auto poFieldDefn = m_poFeatureDefn->GetFieldDefn(i);
1768 4187 : const auto eSubDT = poFieldDefn->GetSubType();
1769 4187 : switch (poFieldDefn->GetType())
1770 : {
1771 2453 : case OFTInteger:
1772 2453 : if (eSubDT == OFSTBoolean)
1773 16 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1774 : static_cast<arrow::BooleanBuilder *>(poBuilder)->Append(
1775 : poFeature->GetFieldAsIntegerUnsafe(i) != 0));
1776 2437 : else if (eSubDT == OFSTInt16)
1777 16 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1778 : static_cast<arrow::Int16Builder *>(poBuilder)->Append(
1779 : static_cast<int16_t>(
1780 : poFeature->GetFieldAsIntegerUnsafe(i))));
1781 : else
1782 2421 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1783 : static_cast<arrow::Int32Builder *>(poBuilder)->Append(
1784 : poFeature->GetFieldAsIntegerUnsafe(i)));
1785 2453 : break;
1786 :
1787 128 : case OFTInteger64:
1788 128 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1789 : static_cast<arrow::Int64Builder *>(poBuilder)->Append(
1790 : static_cast<int64_t>(
1791 : poFeature->GetFieldAsInteger64Unsafe(i))));
1792 128 : break;
1793 :
1794 201 : case OFTReal:
1795 : {
1796 201 : const auto arrowType = m_poSchema->fields()[nArrowIdx]->type();
1797 201 : const double dfVal = poFeature->GetFieldAsDoubleUnsafe(i);
1798 201 : if (arrowType->id() == arrow::Type::DECIMAL128)
1799 : {
1800 : auto res = arrow::Decimal128::FromReal(
1801 : dfVal, poFieldDefn->GetWidth(),
1802 32 : poFieldDefn->GetPrecision());
1803 32 : if (res.ok())
1804 : {
1805 32 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1806 : static_cast<arrow::Decimal128Builder *>(poBuilder)
1807 : ->Append(*res));
1808 : }
1809 : else
1810 : {
1811 0 : CPLError(CE_Warning, CPLE_AppDefined,
1812 : "Cannot parse %.18g as a %d.%d decimal", dfVal,
1813 : poFieldDefn->GetWidth(),
1814 : poFieldDefn->GetPrecision());
1815 0 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poBuilder->AppendNull());
1816 : }
1817 : }
1818 169 : else if (arrowType->id() == arrow::Type::DECIMAL256)
1819 : {
1820 : auto res = arrow::Decimal256::FromReal(
1821 : dfVal, poFieldDefn->GetWidth(),
1822 0 : poFieldDefn->GetPrecision());
1823 0 : if (res.ok())
1824 : {
1825 0 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1826 : static_cast<arrow::Decimal256Builder *>(poBuilder)
1827 : ->Append(*res));
1828 : }
1829 : else
1830 : {
1831 0 : CPLError(CE_Warning, CPLE_AppDefined,
1832 : "Cannot parse %.18g as a %d.%d decimal", dfVal,
1833 : poFieldDefn->GetWidth(),
1834 : poFieldDefn->GetPrecision());
1835 0 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poBuilder->AppendNull());
1836 : }
1837 : }
1838 169 : else if (eSubDT == OFSTFloat32)
1839 : {
1840 28 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1841 : static_cast<arrow::FloatBuilder *>(poBuilder)->Append(
1842 : static_cast<float>(dfVal)));
1843 : }
1844 : else
1845 : {
1846 141 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1847 : static_cast<arrow::DoubleBuilder *>(poBuilder)->Append(
1848 : dfVal));
1849 : }
1850 201 : break;
1851 : }
1852 :
1853 497 : case OFTString:
1854 : case OFTWideString:
1855 497 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1856 : static_cast<arrow::StringBuilder *>(poBuilder)->Append(
1857 : poFeature->GetFieldAsStringUnsafe(i)));
1858 497 : break;
1859 :
1860 62 : case OFTBinary:
1861 : {
1862 62 : int nSize = 0;
1863 62 : const auto pData = poFeature->GetFieldAsBinary(i, &nSize);
1864 62 : if (poFieldDefn->GetWidth() != 0)
1865 : {
1866 20 : if (poFieldDefn->GetWidth() != nSize)
1867 : {
1868 0 : CPLError(
1869 : CE_Warning, CPLE_AppDefined,
1870 : "Cannot write field %s. Got %d bytes, expected %d",
1871 : poFieldDefn->GetNameRef(), nSize,
1872 : poFieldDefn->GetWidth());
1873 0 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poBuilder->AppendNull());
1874 : }
1875 : else
1876 : {
1877 20 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1878 : static_cast<arrow::FixedSizeBinaryBuilder *>(
1879 : poBuilder)
1880 : ->Append(pData));
1881 : }
1882 : }
1883 : else
1884 42 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1885 : static_cast<arrow::BinaryBuilder *>(poBuilder)->Append(
1886 : pData, nSize));
1887 62 : break;
1888 : }
1889 :
1890 216 : case OFTIntegerList:
1891 : {
1892 216 : auto poListBuilder =
1893 : static_cast<arrow::ListBuilder *>(poBuilder);
1894 216 : if (eSubDT == OFSTBoolean)
1895 : {
1896 36 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poListBuilder->Append());
1897 : auto poValueBuilder = static_cast<arrow::BooleanBuilder *>(
1898 36 : poListBuilder->value_builder());
1899 36 : int nValues = 0;
1900 : const auto panValues =
1901 36 : poFeature->GetFieldAsIntegerList(i, &nValues);
1902 108 : for (int j = 0; j < nValues; ++j)
1903 72 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1904 : poValueBuilder->Append(panValues[j] != 0));
1905 : }
1906 180 : else if (eSubDT == OFSTInt16)
1907 : {
1908 0 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poListBuilder->Append());
1909 : auto poValueBuilder = static_cast<arrow::Int16Builder *>(
1910 0 : poListBuilder->value_builder());
1911 0 : int nValues = 0;
1912 : const auto panValues =
1913 0 : poFeature->GetFieldAsIntegerList(i, &nValues);
1914 0 : for (int j = 0; j < nValues; ++j)
1915 0 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poValueBuilder->Append(
1916 : static_cast<int16_t>(panValues[j])));
1917 : }
1918 : else
1919 : {
1920 180 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poListBuilder->Append());
1921 : auto poValueBuilder = static_cast<arrow::Int32Builder *>(
1922 180 : poListBuilder->value_builder());
1923 180 : int nValues = 0;
1924 : const auto panValues =
1925 180 : poFeature->GetFieldAsIntegerList(i, &nValues);
1926 540 : for (int j = 0; j < nValues; ++j)
1927 360 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1928 : poValueBuilder->Append(panValues[j]));
1929 : }
1930 216 : break;
1931 : }
1932 :
1933 92 : case OFTInteger64List:
1934 : {
1935 92 : auto poListBuilder =
1936 : static_cast<arrow::ListBuilder *>(poBuilder);
1937 92 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poListBuilder->Append());
1938 : auto poValueBuilder = static_cast<arrow::Int64Builder *>(
1939 92 : poListBuilder->value_builder());
1940 92 : int nValues = 0;
1941 : const auto panValues =
1942 92 : poFeature->GetFieldAsInteger64List(i, &nValues);
1943 292 : for (int j = 0; j < nValues; ++j)
1944 200 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poValueBuilder->Append(
1945 : static_cast<int64_t>(panValues[j])));
1946 92 : break;
1947 : }
1948 :
1949 152 : case OFTRealList:
1950 : {
1951 152 : auto poListBuilder =
1952 : static_cast<arrow::ListBuilder *>(poBuilder);
1953 152 : if (eSubDT == OFSTFloat32)
1954 : {
1955 48 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poListBuilder->Append());
1956 : auto poValueBuilder = static_cast<arrow::FloatBuilder *>(
1957 48 : poListBuilder->value_builder());
1958 48 : int nValues = 0;
1959 : const auto padfValues =
1960 48 : poFeature->GetFieldAsDoubleList(i, &nValues);
1961 144 : for (int j = 0; j < nValues; ++j)
1962 96 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poValueBuilder->Append(
1963 : static_cast<float>(padfValues[j])));
1964 : }
1965 : else
1966 : {
1967 104 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poListBuilder->Append());
1968 : auto poValueBuilder = static_cast<arrow::DoubleBuilder *>(
1969 104 : poListBuilder->value_builder());
1970 104 : int nValues = 0;
1971 : const auto padfValues =
1972 104 : poFeature->GetFieldAsDoubleList(i, &nValues);
1973 280 : for (int j = 0; j < nValues; ++j)
1974 176 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1975 : poValueBuilder->Append(padfValues[j]));
1976 : }
1977 152 : break;
1978 : }
1979 :
1980 52 : case OFTStringList:
1981 : case OFTWideStringList:
1982 : {
1983 52 : auto poListBuilder =
1984 : static_cast<arrow::ListBuilder *>(poBuilder);
1985 52 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poListBuilder->Append());
1986 : auto poValueBuilder = static_cast<arrow::StringBuilder *>(
1987 52 : poListBuilder->value_builder());
1988 52 : const auto papszValues = poFeature->GetFieldAsStringList(i);
1989 132 : for (int j = 0; papszValues && papszValues[j]; ++j)
1990 80 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1991 : poValueBuilder->Append(papszValues[j]));
1992 52 : break;
1993 : }
1994 :
1995 109 : case OFTDate:
1996 : {
1997 : int nYear, nMonth, nDay, nHour, nMinute;
1998 : float fSec;
1999 : int nTZFlag;
2000 109 : poFeature->GetFieldAsDateTime(i, &nYear, &nMonth, &nDay, &nHour,
2001 : &nMinute, &fSec, &nTZFlag);
2002 : struct tm brokenDown;
2003 109 : memset(&brokenDown, 0, sizeof(brokenDown));
2004 109 : brokenDown.tm_year = nYear - 1900;
2005 109 : brokenDown.tm_mon = nMonth - 1;
2006 109 : brokenDown.tm_mday = nDay;
2007 109 : GIntBig nVal = CPLYMDHMSToUnixTime(&brokenDown);
2008 109 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
2009 : static_cast<arrow::Date32Builder *>(poBuilder)->Append(
2010 : static_cast<int>(nVal / 86400)));
2011 109 : break;
2012 : }
2013 :
2014 36 : case OFTTime:
2015 : {
2016 : int nYear, nMonth, nDay, nHour, nMinute;
2017 : float fSec;
2018 : int nTZFlag;
2019 36 : poFeature->GetFieldAsDateTime(i, &nYear, &nMonth, &nDay, &nHour,
2020 : &nMinute, &fSec, &nTZFlag);
2021 36 : int nVal = nHour * 3600 + nMinute * 60;
2022 36 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
2023 : static_cast<arrow::Time32Builder *>(poBuilder)->Append(
2024 : static_cast<int>(
2025 : (static_cast<double>(nVal) + fSec) * 1000 + 0.5)));
2026 36 : break;
2027 : }
2028 :
2029 189 : case OFTDateTime:
2030 : {
2031 : int nYear, nMonth, nDay, nHour, nMinute;
2032 : float fSec;
2033 : int nTZFlag;
2034 189 : poFeature->GetFieldAsDateTime(i, &nYear, &nMonth, &nDay, &nHour,
2035 : &nMinute, &fSec, &nTZFlag);
2036 : struct tm brokenDown;
2037 189 : memset(&brokenDown, 0, sizeof(brokenDown));
2038 189 : brokenDown.tm_year = nYear - 1900;
2039 189 : brokenDown.tm_mon = nMonth - 1;
2040 189 : brokenDown.tm_mday = nDay;
2041 189 : brokenDown.tm_hour = nHour;
2042 189 : brokenDown.tm_min = nMinute;
2043 189 : brokenDown.tm_sec = 0;
2044 189 : GIntBig nVal = CPLYMDHMSToUnixTime(&brokenDown);
2045 306 : if (!IsFileWriterCreated() &&
2046 117 : m_anTZFlag[i] != OGR_TZFLAG_UNKNOWN)
2047 : {
2048 59 : if (m_anTZFlag[i] == TZFLAG_UNINITIALIZED)
2049 35 : m_anTZFlag[i] = nTZFlag;
2050 24 : else if (m_anTZFlag[i] != nTZFlag)
2051 : {
2052 0 : if (m_anTZFlag[i] >= OGR_TZFLAG_MIXED_TZ &&
2053 0 : nTZFlag >= OGR_TZFLAG_MIXED_TZ)
2054 : {
2055 0 : m_anTZFlag[i] =
2056 : OGR_TZFLAG_MIXED_TZ; // harmonize on UTC ultimately
2057 : }
2058 : else
2059 : {
2060 0 : CPLError(CE_Warning, CPLE_AppDefined,
2061 : "Field %s contains a mix of "
2062 : "timezone-aware and local/without "
2063 : "timezone values.",
2064 : poFieldDefn->GetNameRef());
2065 0 : m_anTZFlag[i] = OGR_TZFLAG_UNKNOWN;
2066 : }
2067 : }
2068 : }
2069 189 : if (nTZFlag > OGR_TZFLAG_MIXED_TZ)
2070 : {
2071 60 : const int nOffsetSec = (nTZFlag - OGR_TZFLAG_UTC) * 15 * 60;
2072 60 : nVal -= nOffsetSec;
2073 : }
2074 189 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
2075 : static_cast<arrow::TimestampBuilder *>(poBuilder)->Append(
2076 : static_cast<int64_t>(
2077 : (static_cast<double>(nVal) + fSec) * 1000 + 0.5)));
2078 189 : break;
2079 : }
2080 : }
2081 : }
2082 :
2083 : // Write geometries
2084 5814 : for (int i = 0; i < nGeomFieldCount; ++i, ++nArrowIdx)
2085 : {
2086 2963 : auto poBuilder = m_apoBuilders[nArrowIdx].get();
2087 2963 : OGRGeometry *poGeom = poFeature->GetGeomFieldRef(i);
2088 2963 : if (BuildGeometry(poGeom, i, poBuilder) != OGRERR_NONE)
2089 0 : return OGRERR_FAILURE;
2090 : }
2091 :
2092 2851 : m_nFeatureCount++;
2093 :
2094 : // Flush the current row group if reaching the limit of rows per group.
2095 2851 : if (!m_apoBuilders.empty() && m_apoBuilders[0]->length() == m_nRowGroupSize)
2096 : {
2097 22 : if (!FlushFeatures())
2098 0 : return OGRERR_FAILURE;
2099 : }
2100 :
2101 2851 : return OGRERR_NONE;
2102 : }
2103 :
2104 : /************************************************************************/
2105 : /* FlushFeatures() */
2106 : /************************************************************************/
2107 :
2108 38 : inline bool OGRArrowWriterLayer::FlushFeatures()
2109 : {
2110 38 : if (m_apoBuilders.empty() || m_apoBuilders[0]->length() == 0)
2111 0 : return true;
2112 :
2113 38 : if (!IsFileWriterCreated())
2114 : {
2115 8 : CreateWriter();
2116 8 : if (!IsFileWriterCreated())
2117 0 : return false;
2118 : }
2119 :
2120 38 : return FlushGroup();
2121 : }
2122 :
2123 : /************************************************************************/
2124 : /* GetFeatureCount() */
2125 : /************************************************************************/
2126 :
2127 1 : inline GIntBig OGRArrowWriterLayer::GetFeatureCount(int bForce)
2128 : {
2129 1 : if (m_poAttrQuery == nullptr && m_poFilterGeom == nullptr)
2130 : {
2131 1 : return m_nFeatureCount;
2132 : }
2133 0 : return OGRLayer::GetFeatureCount(bForce);
2134 : }
2135 :
2136 : /************************************************************************/
2137 : /* TestCapability() */
2138 : /************************************************************************/
2139 :
2140 501 : inline int OGRArrowWriterLayer::TestCapability(const char *pszCap)
2141 : {
2142 501 : if (EQUAL(pszCap, OLCCreateField) || EQUAL(pszCap, OLCCreateGeomField))
2143 27 : return m_poSchema == nullptr;
2144 :
2145 474 : if (EQUAL(pszCap, OLCSequentialWrite))
2146 24 : return true;
2147 :
2148 450 : if (EQUAL(pszCap, OLCFastWriteArrowBatch))
2149 0 : return true;
2150 :
2151 450 : if (EQUAL(pszCap, OLCStringsAsUTF8))
2152 1 : return true;
2153 :
2154 449 : if (EQUAL(pszCap, OLCMeasuredGeometries))
2155 166 : return true;
2156 :
2157 283 : return false;
2158 : }
2159 :
2160 : /************************************************************************/
2161 : /* WriteArrays() */
2162 : /************************************************************************/
2163 :
2164 205 : inline bool OGRArrowWriterLayer::WriteArrays(
2165 : std::function<bool(const std::shared_ptr<arrow::Field> &,
2166 : const std::shared_ptr<arrow::Array> &)>
2167 : postProcessArray)
2168 : {
2169 205 : int nArrowIdx = 0;
2170 205 : int nArrowIdxFirstField = !m_osFIDColumn.empty() ? 1 : 0;
2171 1741 : for (const auto &poBuilder : m_apoBuilders)
2172 : {
2173 1536 : const auto &field = m_poSchema->fields()[nArrowIdx];
2174 :
2175 0 : std::shared_ptr<arrow::Array> array;
2176 1536 : auto status = poBuilder->Finish(&array);
2177 1536 : if (!status.ok())
2178 : {
2179 0 : CPLError(CE_Failure, CPLE_AppDefined,
2180 : "builder::Finish() for field %s failed with %s",
2181 0 : field->name().c_str(), status.message().c_str());
2182 0 : return false;
2183 : }
2184 :
2185 : // CPLDebug("ARROW", "%s", array->ToString().c_str());
2186 :
2187 1536 : const int iCol = nArrowIdx - nArrowIdxFirstField;
2188 1536 : if (iCol >= 0 && iCol < m_poFeatureDefn->GetFieldCount())
2189 : {
2190 1284 : const auto poFieldDefn = m_poFeatureDefn->GetFieldDefn(iCol);
2191 1284 : const auto eFieldType = poFieldDefn->GetType();
2192 1284 : if (eFieldType == OFTInteger || eFieldType == OFTInteger64)
2193 : {
2194 219 : const auto &osDomainName = poFieldDefn->GetDomainName();
2195 : const auto oIter =
2196 219 : m_oMapFieldDomainToStringArray.find(osDomainName);
2197 219 : if (oIter != m_oMapFieldDomainToStringArray.end())
2198 : {
2199 : auto result = arrow::DictionaryArray::FromArrays(
2200 12 : field->type(), array, oIter->second);
2201 12 : if (!result.ok())
2202 : {
2203 0 : CPLError(CE_Failure, CPLE_AppDefined,
2204 : "DictionaryArray::FromArrays() for field %s "
2205 : "failed with %s",
2206 0 : field->name().c_str(),
2207 0 : result.status().message().c_str());
2208 0 : return false;
2209 : }
2210 12 : array = *result;
2211 : }
2212 : }
2213 : }
2214 :
2215 1536 : if (!postProcessArray(field, array))
2216 : {
2217 0 : return false;
2218 : }
2219 :
2220 1536 : nArrowIdx++;
2221 : }
2222 :
2223 205 : if (m_bWriteBBoxStruct)
2224 : {
2225 144 : const int nGeomFieldCount = m_poFeatureDefn->GetGeomFieldCount();
2226 292 : for (int i = 0; i < nGeomFieldCount; ++i)
2227 : {
2228 148 : const auto &field = m_apoFieldsBBOX[i];
2229 0 : std::shared_ptr<arrow::Array> array;
2230 148 : auto status = m_apoBuildersBBOXStruct[i]->Finish(&array);
2231 148 : if (!status.ok())
2232 : {
2233 0 : CPLError(CE_Failure, CPLE_AppDefined,
2234 : "builder::Finish() for field %s failed with %s",
2235 0 : field->name().c_str(), status.message().c_str());
2236 0 : return false;
2237 : }
2238 :
2239 148 : if (!postProcessArray(field, array))
2240 : {
2241 0 : return false;
2242 : }
2243 : }
2244 : }
2245 :
2246 205 : return true;
2247 : }
2248 :
2249 : /************************************************************************/
2250 : /* TestBit() */
2251 : /************************************************************************/
2252 :
2253 498 : static inline bool TestBit(const uint8_t *pabyData, size_t nIdx)
2254 : {
2255 498 : return (pabyData[nIdx / 8] & (1 << (nIdx % 8))) != 0;
2256 : }
2257 :
2258 : /************************************************************************/
2259 : /* WriteArrowBatchInternal() */
2260 : /************************************************************************/
2261 :
2262 122 : inline bool OGRArrowWriterLayer::WriteArrowBatchInternal(
2263 : const struct ArrowSchema *schema, struct ArrowArray *array,
2264 : CSLConstList papszOptions,
2265 : std::function<bool(const std::shared_ptr<arrow::RecordBatch> &)> writeBatch)
2266 : {
2267 122 : if (m_poSchema == nullptr)
2268 : {
2269 119 : CreateSchema();
2270 : }
2271 :
2272 122 : if (!IsFileWriterCreated())
2273 : {
2274 119 : CreateWriter();
2275 119 : if (!IsFileWriterCreated())
2276 0 : return false;
2277 : }
2278 :
2279 122 : if (m_apoBuilders.empty())
2280 : {
2281 119 : CreateArrayBuilders();
2282 : }
2283 :
2284 122 : const int nGeomFieldCount = m_poFeatureDefn->GetGeomFieldCount();
2285 122 : const int nGeomFieldCountBBoxFields =
2286 122 : m_bWriteBBoxStruct ? nGeomFieldCount : 0;
2287 :
2288 122 : const char *pszFIDName = CSLFetchNameValueDef(
2289 : papszOptions, "FID", OGRLayer::DEFAULT_ARROW_FID_NAME);
2290 : const char *pszSingleGeomFieldName =
2291 122 : CSLFetchNameValue(papszOptions, "GEOMETRY_NAME");
2292 :
2293 : // Sort schema and array children in the same order as m_poSchema.
2294 : // This is needed for non-WKB geometry encoding
2295 244 : std::map<std::string, int> oMapSchemaChildrenNameToIdx;
2296 1324 : for (int i = 0; i < static_cast<int>(schema->n_children); ++i)
2297 : {
2298 1202 : if (oMapSchemaChildrenNameToIdx.find(schema->children[i]->name) !=
2299 2404 : oMapSchemaChildrenNameToIdx.end())
2300 : {
2301 0 : CPLError(CE_Failure, CPLE_AppDefined,
2302 : "Several fields with same name '%s' found",
2303 0 : schema->children[i]->name);
2304 0 : return false;
2305 : }
2306 1202 : oMapSchemaChildrenNameToIdx[schema->children[i]->name] = i;
2307 :
2308 1202 : if (!pszSingleGeomFieldName && schema->children[i]->metadata)
2309 : {
2310 : const auto oMetadata =
2311 250 : OGRParseArrowMetadata(schema->children[i]->metadata);
2312 125 : auto oIter = oMetadata.find(ARROW_EXTENSION_NAME_KEY);
2313 255 : if (oIter != oMetadata.end() &&
2314 130 : (oIter->second == EXTENSION_NAME_OGC_WKB ||
2315 8 : oIter->second == EXTENSION_NAME_GEOARROW_WKB))
2316 : {
2317 122 : pszSingleGeomFieldName = schema->children[i]->name;
2318 : }
2319 : }
2320 : }
2321 122 : if (!pszSingleGeomFieldName)
2322 0 : pszSingleGeomFieldName = OGRLayer::DEFAULT_ARROW_GEOMETRY_NAME;
2323 :
2324 122 : std::vector<int> anMapLayerSchemaToArraySchema(m_poSchema->num_fields(),
2325 366 : -1);
2326 : struct ArrowArray fidArray;
2327 : struct ArrowSchema fidSchema;
2328 122 : memset(&fidArray, 0, sizeof(fidArray));
2329 122 : memset(&fidSchema, 0, sizeof(fidSchema));
2330 244 : std::vector<void *> apBuffersFid;
2331 244 : std::vector<int64_t> fids;
2332 :
2333 244 : std::set<int> oSetReferencedFieldsInArraySchema;
2334 0 : const auto DummyFreeArray = [](struct ArrowArray *ptrArray)
2335 0 : { ptrArray->release = nullptr; };
2336 122 : const auto DummyFreeSchema = [](struct ArrowSchema *ptrSchema)
2337 122 : { ptrSchema->release = nullptr; };
2338 122 : bool bRebuildBatch = false;
2339 1323 : for (int i = 0; i < m_poSchema->num_fields() - nGeomFieldCountBBoxFields;
2340 : ++i)
2341 : {
2342 : auto oIter =
2343 1201 : oMapSchemaChildrenNameToIdx.find(m_poSchema->field(i)->name());
2344 1201 : if (oIter == oMapSchemaChildrenNameToIdx.end())
2345 : {
2346 6 : if (m_poSchema->field(i)->name() == m_osFIDColumn)
2347 : {
2348 2 : oIter = oMapSchemaChildrenNameToIdx.find(pszFIDName);
2349 2 : if (oIter == oMapSchemaChildrenNameToIdx.end())
2350 : {
2351 : // If the input data does not contain a FID column, but
2352 : // the output file requires it, creates a default FID column
2353 1 : fidArray.release = DummyFreeArray;
2354 1 : fidArray.n_buffers = 2;
2355 1 : apBuffersFid.resize(2);
2356 1 : fidArray.buffers =
2357 1 : const_cast<const void **>(apBuffersFid.data());
2358 1 : fids.reserve(static_cast<size_t>(array->length));
2359 1 : for (size_t iRow = 0;
2360 11 : iRow < static_cast<size_t>(array->length); ++iRow)
2361 10 : fids.push_back(m_nFeatureCount + iRow);
2362 1 : fidArray.buffers[1] = fids.data();
2363 1 : fidArray.length = array->length;
2364 1 : fidSchema.release = DummyFreeSchema;
2365 1 : fidSchema.name = m_osFIDColumn.c_str();
2366 1 : fidSchema.format = "l"; // int64
2367 1 : continue;
2368 : }
2369 : }
2370 8 : else if (nGeomFieldCount == 1 &&
2371 4 : m_poFeatureDefn->GetGeomFieldIndex(
2372 4 : m_poSchema->field(i)->name().c_str()) == 0)
2373 : {
2374 : oIter =
2375 4 : oMapSchemaChildrenNameToIdx.find(pszSingleGeomFieldName);
2376 4 : if (oIter != oMapSchemaChildrenNameToIdx.end())
2377 4 : bRebuildBatch = true;
2378 : }
2379 :
2380 5 : if (oIter == oMapSchemaChildrenNameToIdx.end())
2381 : {
2382 0 : CPLError(CE_Failure, CPLE_AppDefined,
2383 : "Cannot find field '%s' in schema",
2384 0 : m_poSchema->field(i)->name().c_str());
2385 0 : return false;
2386 : }
2387 : }
2388 1200 : anMapLayerSchemaToArraySchema[i] = oIter->second;
2389 1200 : oSetReferencedFieldsInArraySchema.insert(oIter->second);
2390 : }
2391 :
2392 : // Note: we cheat a bit by declaring a single instance of the minx/miny/
2393 : // maxx/maxy sub-field ArrowSchema*, and make all struct ArrowSchema point
2394 : // to them. That's OK because we use DummyFreeSchema to release, which does
2395 : // nothing.
2396 : struct ArrowSchema bboxStructSchemaXMin;
2397 : struct ArrowSchema bboxStructSchemaYMin;
2398 : struct ArrowSchema bboxStructSchemaXMax;
2399 : struct ArrowSchema bboxStructSchemaYMax;
2400 122 : constexpr int BBOX_SUBFIELD_COUNT = 4;
2401 : std::array<struct ArrowSchema *, BBOX_SUBFIELD_COUNT>
2402 : bboxStructSchemaChildren;
2403 122 : constexpr int BBOX_STRUCT_BUFFER_COUNT = 1; // validity bitmap array
2404 : // cppcheck-suppress constStatement
2405 : std::vector<std::array<const void *, BBOX_STRUCT_BUFFER_COUNT>>
2406 244 : bboxStructBuffersPtr;
2407 244 : std::vector<std::vector<GByte>> aabyBboxStructValidity;
2408 244 : std::vector<std::vector<float>> aadfMinX, aadfMinY, aadfMaxX, aadfMaxY;
2409 : // cppcheck-suppress constStatement
2410 244 : std::vector<std::array<struct ArrowArray, BBOX_SUBFIELD_COUNT>> bboxArrays;
2411 : // cppcheck-suppress constStatement
2412 : std::vector<std::array<struct ArrowArray *, BBOX_SUBFIELD_COUNT>>
2413 244 : bboxArraysPtr;
2414 122 : constexpr int BBOX_SUBFIELD_BUFFER_COUNT =
2415 : 2; // validity bitmap array and float array
2416 : std::vector<std::array<std::array<const void *, BBOX_SUBFIELD_BUFFER_COUNT>,
2417 : BBOX_SUBFIELD_COUNT>>
2418 244 : bboxBuffersPtr;
2419 :
2420 : // Temporary arrays to hold the geometry bounding boxes.
2421 244 : std::vector<struct ArrowArray> bboxStructArray;
2422 244 : std::vector<struct ArrowSchema> bboxStructSchema;
2423 :
2424 244 : std::vector<struct ArrowSchema *> newSchemaChildren;
2425 244 : std::vector<struct ArrowArray *> newArrayChildren;
2426 122 : newSchemaChildren.reserve(m_poSchema->num_fields());
2427 122 : newArrayChildren.reserve(m_poSchema->num_fields());
2428 1323 : for (int i = 0; i < m_poSchema->num_fields() - nGeomFieldCountBBoxFields;
2429 : ++i)
2430 : {
2431 1201 : if (anMapLayerSchemaToArraySchema[i] < 0)
2432 : {
2433 1 : CPLAssert(m_poSchema->field(i)->name() == m_osFIDColumn);
2434 1 : newSchemaChildren.emplace_back(&fidSchema);
2435 1 : newArrayChildren.emplace_back(&fidArray);
2436 : }
2437 : else
2438 : {
2439 : newSchemaChildren.emplace_back(
2440 1200 : schema->children[anMapLayerSchemaToArraySchema[i]]);
2441 : newArrayChildren.emplace_back(
2442 1200 : array->children[anMapLayerSchemaToArraySchema[i]]);
2443 : }
2444 : }
2445 :
2446 122 : if (m_bWriteBBoxStruct)
2447 : {
2448 9 : memset(&bboxStructSchemaXMin, 0, sizeof(bboxStructSchemaXMin));
2449 9 : memset(&bboxStructSchemaYMin, 0, sizeof(bboxStructSchemaYMin));
2450 9 : memset(&bboxStructSchemaXMax, 0, sizeof(bboxStructSchemaXMax));
2451 9 : memset(&bboxStructSchemaYMax, 0, sizeof(bboxStructSchemaYMax));
2452 :
2453 9 : bboxStructSchemaXMin.release = DummyFreeSchema;
2454 9 : bboxStructSchemaXMin.name = "xmin";
2455 9 : bboxStructSchemaXMin.format = "f"; // float32
2456 :
2457 9 : bboxStructSchemaYMin.release = DummyFreeSchema;
2458 9 : bboxStructSchemaYMin.name = "ymin";
2459 9 : bboxStructSchemaYMin.format = "f"; // float32
2460 :
2461 9 : bboxStructSchemaXMax.release = DummyFreeSchema;
2462 9 : bboxStructSchemaXMax.name = "xmax";
2463 9 : bboxStructSchemaXMax.format = "f"; // float32
2464 :
2465 9 : bboxStructSchemaYMax.release = DummyFreeSchema;
2466 9 : bboxStructSchemaYMax.name = "ymax";
2467 9 : bboxStructSchemaYMax.format = "f"; // float32
2468 :
2469 : try
2470 : {
2471 9 : constexpr int XMIN_IDX = 0;
2472 9 : constexpr int YMIN_IDX = 1;
2473 9 : constexpr int XMAX_IDX = 2;
2474 9 : constexpr int YMAX_IDX = 3;
2475 9 : bboxStructSchemaChildren[XMIN_IDX] = &bboxStructSchemaXMin;
2476 : // cppcheck-suppress objectIndex
2477 9 : bboxStructSchemaChildren[YMIN_IDX] = &bboxStructSchemaYMin;
2478 : // cppcheck-suppress objectIndex
2479 9 : bboxStructSchemaChildren[XMAX_IDX] = &bboxStructSchemaXMax;
2480 : // cppcheck-suppress objectIndex
2481 9 : bboxStructSchemaChildren[YMAX_IDX] = &bboxStructSchemaYMax;
2482 :
2483 9 : bboxStructArray.resize(nGeomFieldCount);
2484 9 : bboxStructSchema.resize(nGeomFieldCount);
2485 9 : bboxArrays.resize(nGeomFieldCount);
2486 9 : bboxArraysPtr.resize(nGeomFieldCount);
2487 9 : bboxBuffersPtr.resize(nGeomFieldCount);
2488 9 : bboxStructBuffersPtr.resize(nGeomFieldCount);
2489 9 : aabyBboxStructValidity.resize(nGeomFieldCount);
2490 18 : memset(bboxStructArray.data(), 0,
2491 9 : nGeomFieldCount * sizeof(bboxStructArray[0]));
2492 18 : memset(bboxStructSchema.data(), 0,
2493 9 : nGeomFieldCount * sizeof(bboxStructSchema[0]));
2494 18 : memset(bboxArrays.data(), 0,
2495 9 : nGeomFieldCount * sizeof(bboxArrays[0]));
2496 9 : aadfMinX.resize(nGeomFieldCount);
2497 9 : aadfMinY.resize(nGeomFieldCount);
2498 9 : aadfMaxX.resize(nGeomFieldCount);
2499 9 : aadfMaxY.resize(nGeomFieldCount);
2500 18 : for (int i = 0; i < nGeomFieldCount; ++i)
2501 : {
2502 9 : const bool bIsNullable = CPL_TO_BOOL(
2503 9 : m_poFeatureDefn->GetGeomFieldDefn(i)->IsNullable());
2504 9 : aadfMinX[i].reserve(static_cast<size_t>(array->length));
2505 9 : aadfMinY[i].reserve(static_cast<size_t>(array->length));
2506 9 : aadfMaxX[i].reserve(static_cast<size_t>(array->length));
2507 9 : aadfMaxY[i].reserve(static_cast<size_t>(array->length));
2508 9 : aabyBboxStructValidity[i].resize(
2509 9 : static_cast<size_t>(array->length + 7) / 8, 0xFF);
2510 :
2511 9 : bboxStructSchema[i].release = DummyFreeSchema;
2512 9 : bboxStructSchema[i].name = m_apoFieldsBBOX[i]->name().c_str();
2513 9 : bboxStructSchema[i].format = "+s"; // structure
2514 9 : bboxStructSchema[i].flags =
2515 9 : bIsNullable ? ARROW_FLAG_NULLABLE : 0;
2516 9 : bboxStructSchema[i].n_children = BBOX_SUBFIELD_COUNT;
2517 9 : bboxStructSchema[i].children = bboxStructSchemaChildren.data();
2518 :
2519 9 : constexpr int VALIDITY_ARRAY_IDX = 0;
2520 9 : constexpr int BBOX_SUBFIELD_FLOAT_VALUE_IDX = 1;
2521 9 : bboxBuffersPtr[i][XMIN_IDX][BBOX_SUBFIELD_FLOAT_VALUE_IDX] =
2522 9 : aadfMinX[i].data();
2523 9 : bboxBuffersPtr[i][YMIN_IDX][BBOX_SUBFIELD_FLOAT_VALUE_IDX] =
2524 9 : aadfMinY[i].data();
2525 9 : bboxBuffersPtr[i][XMAX_IDX][BBOX_SUBFIELD_FLOAT_VALUE_IDX] =
2526 9 : aadfMaxX[i].data();
2527 9 : bboxBuffersPtr[i][YMAX_IDX][BBOX_SUBFIELD_FLOAT_VALUE_IDX] =
2528 9 : aadfMaxY[i].data();
2529 :
2530 45 : for (int j = 0; j < BBOX_SUBFIELD_COUNT; ++j)
2531 : {
2532 36 : bboxBuffersPtr[i][j][VALIDITY_ARRAY_IDX] = nullptr;
2533 :
2534 36 : bboxArrays[i][j].release = DummyFreeArray;
2535 36 : bboxArrays[i][j].length = array->length;
2536 36 : bboxArrays[i][j].n_buffers = BBOX_SUBFIELD_BUFFER_COUNT;
2537 36 : bboxArrays[i][j].buffers = bboxBuffersPtr[i][j].data();
2538 :
2539 36 : bboxArraysPtr[i][j] = &bboxArrays[i][j];
2540 : }
2541 :
2542 9 : bboxStructArray[i].release = DummyFreeArray;
2543 9 : bboxStructArray[i].n_children = BBOX_SUBFIELD_COUNT;
2544 : // coverity[escape]
2545 9 : bboxStructArray[i].children = bboxArraysPtr[i].data();
2546 9 : bboxStructArray[i].length = array->length;
2547 9 : bboxStructArray[i].n_buffers = BBOX_STRUCT_BUFFER_COUNT;
2548 9 : bboxStructBuffersPtr[i][VALIDITY_ARRAY_IDX] =
2549 9 : bIsNullable ? aabyBboxStructValidity[i].data() : nullptr;
2550 : // coverity[escape]
2551 9 : bboxStructArray[i].buffers = bboxStructBuffersPtr[i].data();
2552 :
2553 9 : newSchemaChildren.emplace_back(&bboxStructSchema[i]);
2554 9 : newArrayChildren.emplace_back(&bboxStructArray[i]);
2555 : }
2556 : }
2557 0 : catch (const std::bad_alloc &)
2558 : {
2559 0 : CPLError(CE_Failure, CPLE_OutOfMemory,
2560 : "Out of memory in "
2561 : "OGRArrowWriterLayer::WriteArrowBatchInternal()");
2562 0 : return false;
2563 : }
2564 : }
2565 :
2566 1324 : for (int i = 0; i < static_cast<int>(schema->n_children); ++i)
2567 : {
2568 1202 : if (oSetReferencedFieldsInArraySchema.find(i) ==
2569 2404 : oSetReferencedFieldsInArraySchema.end())
2570 : {
2571 4 : if (m_osFIDColumn.empty() &&
2572 2 : strcmp(schema->children[i]->name, pszFIDName) == 0)
2573 : {
2574 : // If the input data contains a FID column, but the output data
2575 : // does not, then ignore it.
2576 : }
2577 : else
2578 : {
2579 0 : CPLError(CE_Failure, CPLE_AppDefined,
2580 : "Found field '%s' in array schema that does not exist "
2581 : "in layer schema",
2582 0 : schema->children[i]->name);
2583 0 : return false;
2584 : }
2585 : }
2586 : }
2587 :
2588 : // ImportSchema() would release the schema, but we don't want that
2589 : // So copy the structure content into a local variable, and override its
2590 : // release callback to a no-op. This may be a bit fragile, but it doesn't
2591 : // look like ImportSchema implementation tries to access the C ArrowSchema
2592 : // after it has been called.
2593 122 : struct ArrowSchema lSchema = *schema;
2594 122 : schema = &lSchema;
2595 122 : CPL_IGNORE_RET_VAL(schema);
2596 :
2597 122 : lSchema.n_children = newSchemaChildren.size();
2598 122 : lSchema.children = newSchemaChildren.data();
2599 :
2600 122 : lSchema.release = DummyFreeSchema;
2601 244 : auto poSchemaResult = arrow::ImportSchema(&lSchema);
2602 122 : CPLAssert(lSchema.release == nullptr);
2603 122 : if (!poSchemaResult.ok())
2604 : {
2605 0 : CPLError(CE_Failure, CPLE_AppDefined, "ImportSchema() failed with %s",
2606 0 : poSchemaResult.status().message().c_str());
2607 0 : return false;
2608 : }
2609 244 : auto poSchema = *poSchemaResult;
2610 :
2611 : // Hack the array to use the new children we've computed above
2612 : // but make sure the original release() callback sees the original children
2613 : struct ArrayReleaser
2614 : {
2615 : struct ArrowArray ori_array
2616 : {
2617 : };
2618 :
2619 122 : explicit ArrayReleaser(struct ArrowArray *array)
2620 122 : {
2621 122 : memcpy(&ori_array, array, sizeof(*array));
2622 122 : array->release = ArrayReleaser::release;
2623 122 : array->private_data = this;
2624 122 : }
2625 :
2626 122 : static void release(struct ArrowArray *array)
2627 : {
2628 122 : struct ArrayReleaser *releaser =
2629 : static_cast<struct ArrayReleaser *>(array->private_data);
2630 122 : memcpy(array, &(releaser->ori_array), sizeof(*array));
2631 122 : CPLAssert(array->release != nullptr);
2632 122 : array->release(array);
2633 122 : CPLAssert(array->release == nullptr);
2634 122 : delete releaser;
2635 122 : }
2636 : };
2637 :
2638 : // Must be allocated on the heap, since ArrayReleaser::release() will be
2639 : // called after this method has ended.
2640 122 : ArrayReleaser *releaser = new ArrayReleaser(array);
2641 122 : array->private_data = releaser;
2642 122 : array->n_children = newArrayChildren.size();
2643 : // cppcheck-suppress autoVariables
2644 122 : array->children = newArrayChildren.data();
2645 :
2646 : // Process geometry columns:
2647 : // - if the output encoding is WKB, then just note the geometry type and
2648 : // envelope.
2649 : // - otherwise convert to the output encoding.
2650 122 : int nBuilderIdx = 0;
2651 122 : if (!m_osFIDColumn.empty())
2652 : {
2653 3 : nBuilderIdx++;
2654 : }
2655 : std::map<std::string, std::shared_ptr<arrow::Array>>
2656 244 : oMapGeomFieldNameToArray;
2657 244 : for (int i = 0; i < nGeomFieldCount; ++i, ++nBuilderIdx)
2658 : {
2659 : const char *pszThisGeomFieldName =
2660 122 : m_poFeatureDefn->GetGeomFieldDefn(i)->GetNameRef();
2661 122 : int nIdx = poSchema->GetFieldIndex(pszThisGeomFieldName);
2662 122 : if (nIdx < 0)
2663 : {
2664 4 : if (nGeomFieldCount == 1)
2665 4 : nIdx = poSchema->GetFieldIndex(pszSingleGeomFieldName);
2666 4 : if (nIdx < 0)
2667 : {
2668 0 : CPLError(CE_Failure, CPLE_AppDefined,
2669 : "Cannot find geometry field '%s' in schema",
2670 : pszThisGeomFieldName);
2671 0 : return false;
2672 : }
2673 : }
2674 :
2675 122 : if (strcmp(lSchema.children[nIdx]->format, "z") != 0 &&
2676 1 : strcmp(lSchema.children[nIdx]->format, "Z") != 0)
2677 : {
2678 0 : CPLError(CE_Failure, CPLE_AppDefined,
2679 : "Type of geometry field '%s' is not binary, but '%s'",
2680 0 : pszThisGeomFieldName, lSchema.children[nIdx]->format);
2681 0 : return false;
2682 : }
2683 :
2684 122 : const auto psGeomArray = array->children[nIdx];
2685 122 : const uint8_t *pabyValidity =
2686 122 : psGeomArray->null_count != 0
2687 122 : ? static_cast<const uint8_t *>(psGeomArray->buffers[0])
2688 : : nullptr;
2689 122 : const bool bUseOffsets32 =
2690 122 : (strcmp(lSchema.children[nIdx]->format, "z") == 0);
2691 122 : const uint32_t *panOffsets32 =
2692 122 : static_cast<const uint32_t *>(psGeomArray->buffers[1]) +
2693 122 : psGeomArray->offset;
2694 122 : const uint64_t *panOffsets64 =
2695 122 : static_cast<const uint64_t *>(psGeomArray->buffers[1]) +
2696 122 : psGeomArray->offset;
2697 122 : GByte *pabyData =
2698 122 : static_cast<GByte *>(const_cast<void *>(psGeomArray->buffers[2]));
2699 122 : OGREnvelope sEnvelope;
2700 122 : auto poBuilder = m_apoBuilders[nBuilderIdx].get();
2701 :
2702 667 : for (size_t iRow = 0; iRow < static_cast<size_t>(psGeomArray->length);
2703 : ++iRow)
2704 : {
2705 545 : bool bValidGeom = false;
2706 :
2707 1043 : if (!pabyValidity ||
2708 498 : TestBit(pabyValidity, iRow + psGeomArray->offset))
2709 : {
2710 431 : const auto nLen =
2711 431 : bUseOffsets32 ? static_cast<size_t>(panOffsets32[iRow + 1] -
2712 421 : panOffsets32[iRow])
2713 10 : : static_cast<size_t>(panOffsets64[iRow + 1] -
2714 10 : panOffsets64[iRow]);
2715 431 : GByte *pabyWkb =
2716 431 : pabyData + (bUseOffsets32
2717 421 : ? panOffsets32[iRow]
2718 10 : : static_cast<size_t>(panOffsets64[iRow]));
2719 431 : if (m_aeGeomEncoding[i] == OGRArrowGeomEncoding::WKB)
2720 : {
2721 163 : FixupWKBGeometryBeforeWriting(pabyWkb, nLen);
2722 :
2723 163 : uint32_t nType = 0;
2724 163 : bool bNeedSwap = false;
2725 163 : if (OGRWKBGetGeomType(pabyWkb, nLen, bNeedSwap, nType))
2726 : {
2727 163 : m_oSetWrittenGeometryTypes[i].insert(
2728 163 : static_cast<OGRwkbGeometryType>(nType));
2729 163 : if (OGRWKBGetBoundingBox(pabyWkb, nLen, sEnvelope))
2730 : {
2731 163 : bValidGeom = true;
2732 163 : m_aoEnvelopes[i].Merge(sEnvelope);
2733 :
2734 163 : if (m_bWriteBBoxStruct)
2735 : {
2736 25 : aadfMinX[i].push_back(
2737 25 : castToFloatDown(sEnvelope.MinX));
2738 25 : aadfMinY[i].push_back(
2739 25 : castToFloatDown(sEnvelope.MinY));
2740 25 : aadfMaxX[i].push_back(
2741 25 : castToFloatUp(sEnvelope.MaxX));
2742 25 : aadfMaxY[i].push_back(
2743 25 : castToFloatUp(sEnvelope.MaxY));
2744 : }
2745 : }
2746 : }
2747 : }
2748 : else
2749 : {
2750 268 : size_t nBytesConsumedOut = 0;
2751 268 : OGRGeometry *poGeometry = nullptr;
2752 268 : OGRGeometryFactory::createFromWkb(
2753 : pabyWkb, nullptr, &poGeometry, nLen, wkbVariantIso,
2754 : nBytesConsumedOut);
2755 268 : if (BuildGeometry(poGeometry, i, poBuilder) != OGRERR_NONE)
2756 : {
2757 0 : delete poGeometry;
2758 0 : return false;
2759 : }
2760 268 : bValidGeom = true;
2761 268 : if (m_bWriteBBoxStruct)
2762 : {
2763 0 : poGeometry->getEnvelope(&sEnvelope);
2764 0 : aadfMinX[i].push_back(castToFloatDown(sEnvelope.MinX));
2765 0 : aadfMinY[i].push_back(castToFloatDown(sEnvelope.MinY));
2766 0 : aadfMaxX[i].push_back(castToFloatUp(sEnvelope.MaxX));
2767 0 : aadfMaxY[i].push_back(castToFloatUp(sEnvelope.MaxY));
2768 : }
2769 268 : delete poGeometry;
2770 : }
2771 : }
2772 : else
2773 : {
2774 114 : if (m_aeGeomEncoding[i] != OGRArrowGeomEncoding::WKB)
2775 : {
2776 81 : if (BuildGeometry(nullptr, i, poBuilder) != OGRERR_NONE)
2777 0 : return false;
2778 : }
2779 : }
2780 :
2781 545 : if (!bValidGeom && m_bWriteBBoxStruct)
2782 : {
2783 4 : if ((bboxStructSchema[i].flags & ARROW_FLAG_NULLABLE))
2784 : {
2785 4 : bboxStructArray[i].null_count++;
2786 4 : aabyBboxStructValidity[i][iRow / 8] &=
2787 4 : ~(1 << static_cast<int>(iRow % 8));
2788 : }
2789 4 : aadfMinX[i].push_back(0.0f);
2790 4 : aadfMinY[i].push_back(0.0f);
2791 4 : aadfMaxX[i].push_back(0.0f);
2792 4 : aadfMaxY[i].push_back(0.0f);
2793 : }
2794 : }
2795 :
2796 122 : if (m_aeGeomEncoding[i] != OGRArrowGeomEncoding::WKB)
2797 : {
2798 0 : std::shared_ptr<arrow::Array> geomArray;
2799 81 : auto status = poBuilder->Finish(&geomArray);
2800 81 : if (!status.ok())
2801 : {
2802 0 : CPLError(CE_Failure, CPLE_AppDefined,
2803 : "builder::Finish() for field %s failed with %s",
2804 0 : pszThisGeomFieldName, status.message().c_str());
2805 0 : return false;
2806 : }
2807 162 : oMapGeomFieldNameToArray[pszThisGeomFieldName] =
2808 162 : std::move(geomArray);
2809 : }
2810 : }
2811 :
2812 : auto poRecordBatchResult =
2813 244 : arrow::ImportRecordBatch(array, std::move(poSchema));
2814 122 : if (!poRecordBatchResult.ok())
2815 : {
2816 0 : CPLError(CE_Failure, CPLE_AppDefined,
2817 : "ImportRecordBatch() failed with %s",
2818 0 : poRecordBatchResult.status().message().c_str());
2819 0 : return false;
2820 : }
2821 244 : auto poRecordBatch = *poRecordBatchResult;
2822 :
2823 : // below assertion commented out since it is not strictly necessary, but
2824 : // reflects what ImportRecordBatch() does.
2825 : // CPLAssert(array->release == nullptr);
2826 :
2827 : // We may need to reconstruct a final record batch that perfectly matches
2828 : // the expected schema.
2829 122 : if (bRebuildBatch || !oMapGeomFieldNameToArray.empty())
2830 : {
2831 85 : std::vector<std::shared_ptr<arrow::Array>> apoArrays;
2832 668 : for (int i = 0; i < m_poSchema->num_fields(); ++i)
2833 : {
2834 : auto oIter =
2835 583 : oMapGeomFieldNameToArray.find(m_poSchema->field(i)->name());
2836 583 : if (oIter != oMapGeomFieldNameToArray.end())
2837 81 : apoArrays.emplace_back(oIter->second);
2838 : else
2839 502 : apoArrays.emplace_back(poRecordBatch->column(i));
2840 583 : if (apoArrays.back()->type()->id() !=
2841 583 : m_poSchema->field(i)->type()->id())
2842 : {
2843 0 : CPLError(CE_Failure, CPLE_AppDefined,
2844 : "Field '%s' of unexpected type",
2845 0 : m_poSchema->field(i)->name().c_str());
2846 0 : return false;
2847 : }
2848 : }
2849 255 : poRecordBatchResult = arrow::RecordBatch::Make(
2850 255 : m_poSchema, poRecordBatch->num_rows(), std::move(apoArrays));
2851 85 : if (!poRecordBatchResult.ok())
2852 : {
2853 0 : CPLError(CE_Failure, CPLE_AppDefined,
2854 : "RecordBatch::Make() failed with %s",
2855 0 : poRecordBatchResult.status().message().c_str());
2856 0 : return false;
2857 : }
2858 85 : poRecordBatch = *poRecordBatchResult;
2859 : }
2860 :
2861 122 : if (writeBatch(poRecordBatch))
2862 : {
2863 122 : m_nFeatureCount += poRecordBatch->num_rows();
2864 122 : return true;
2865 : }
2866 0 : return false;
2867 : }
2868 :
2869 : #endif /* OGARROWWRITERLAYER_HPP_INCLUDED */
|