Line data Source code
1 : /******************************************************************************
2 : *
3 : * Project: Arrow generic code
4 : * Purpose: Arrow generic code
5 : * Author: Even Rouault, <even.rouault at spatialys.com>
6 : *
7 : ******************************************************************************
8 : * Copyright (c) 2022, Planet Labs
9 : *
10 : * SPDX-License-Identifier: MIT
11 : ****************************************************************************/
12 :
13 : #ifndef OGARROWWRITERLAYER_HPP_INCLUDED
14 : #define OGARROWWRITERLAYER_HPP_INCLUDED
15 :
16 : #include "ogr_arrow.h"
17 :
18 : #include "cpl_json.h"
19 : #include "cpl_time.h"
20 :
21 : #include "ogrlayerarrow.h"
22 : #include "ogr_wkb.h"
23 :
24 : #include <array>
25 : #include <cinttypes>
26 : #include <limits>
27 :
28 : static constexpr int TZFLAG_UNINITIALIZED = -1;
29 :
30 : #define OGR_ARROW_RETURN_NOT_OK(status, ret_value) \
31 : do \
32 : { \
33 : if (!(status).ok()) \
34 : { \
35 : CPLError(CE_Failure, CPLE_AppDefined, "%s failed", \
36 : (status).message().c_str()); \
37 : return (ret_value); \
38 : } \
39 : } while (false)
40 :
41 : #define OGR_ARROW_RETURN_FALSE_NOT_OK(status) \
42 : OGR_ARROW_RETURN_NOT_OK(status, false)
43 :
44 : #define OGR_ARROW_RETURN_OGRERR_NOT_OK(status) \
45 : OGR_ARROW_RETURN_NOT_OK(status, OGRERR_FAILURE)
46 :
47 : #define OGR_ARROW_PROPAGATE_OGRERR(ret_value) \
48 : do \
49 : { \
50 : if ((ret_value) != OGRERR_NONE) \
51 : return OGRERR_FAILURE; \
52 : } while (0)
53 :
54 : /************************************************************************/
55 : /* OGRArrowWriterLayer() */
56 : /************************************************************************/
57 :
58 516 : inline OGRArrowWriterLayer::OGRArrowWriterLayer(
59 : arrow::MemoryPool *poMemoryPool,
60 : const std::shared_ptr<arrow::io::OutputStream> &poOutputStream,
61 516 : const char *pszLayerName)
62 516 : : m_poMemoryPool(poMemoryPool), m_poOutputStream(poOutputStream)
63 : {
64 516 : m_poFeatureDefn = new OGRFeatureDefn(pszLayerName);
65 516 : m_poFeatureDefn->SetGeomType(wkbNone);
66 516 : m_poFeatureDefn->Reference();
67 516 : SetDescription(pszLayerName);
68 516 : }
69 :
70 : /************************************************************************/
71 : /* ~OGRArrowWriterLayer() */
72 : /************************************************************************/
73 :
74 516 : inline OGRArrowWriterLayer::~OGRArrowWriterLayer()
75 : {
76 516 : CPLDebug("ARROW", "Memory pool (writer layer): bytes_allocated = %" PRId64,
77 516 : m_poMemoryPool->bytes_allocated());
78 516 : CPLDebug("ARROW", "Memory pool (writer layer): max_memory = %" PRId64,
79 516 : m_poMemoryPool->max_memory());
80 :
81 516 : m_poFeatureDefn->Release();
82 516 : }
83 :
84 : /************************************************************************/
85 : /* FinalizeWriting() */
86 : /************************************************************************/
87 :
88 504 : inline bool OGRArrowWriterLayer::FinalizeWriting()
89 : {
90 504 : bool ret = true;
91 :
92 504 : if (!IsFileWriterCreated())
93 : {
94 363 : CreateWriter();
95 : }
96 504 : if (IsFileWriterCreated())
97 : {
98 504 : PerformStepsBeforeFinalFlushGroup();
99 :
100 504 : if (!m_apoBuilders.empty() && m_apoFieldsFromArrowSchema.empty())
101 314 : ret = FlushGroup();
102 :
103 504 : if (!CloseFileWriter())
104 0 : ret = false;
105 : }
106 :
107 504 : return ret;
108 : }
109 :
110 : /************************************************************************/
111 : /* RemoveIDFromMemberOfEnsembles() */
112 : /************************************************************************/
113 :
114 : /* static */
115 : inline void
116 704 : OGRArrowWriterLayer::RemoveIDFromMemberOfEnsembles(CPLJSONObject &obj)
117 : {
118 : // Remove "id" from members of datum ensembles for compatibility with
119 : // older PROJ versions
120 : // Cf https://github.com/opengeospatial/geoparquet/discussions/110
121 : // and https://github.com/OSGeo/PROJ/pull/3221
122 704 : if (obj.GetType() == CPLJSONObject::Type::Object)
123 : {
124 898 : for (auto &subObj : obj.GetChildren())
125 : {
126 684 : RemoveIDFromMemberOfEnsembles(subObj);
127 : }
128 : }
129 542 : else if (obj.GetType() == CPLJSONObject::Type::Array &&
130 542 : obj.GetName() == "members")
131 : {
132 0 : for (auto &subObj : obj.ToArray())
133 : {
134 0 : subObj.Delete("id");
135 : }
136 : }
137 704 : }
138 :
139 : /************************************************************************/
140 : /* IdentifyCRS() */
141 : /************************************************************************/
142 :
143 : /* static */
144 : inline OGRSpatialReference
145 41 : OGRArrowWriterLayer::IdentifyCRS(const OGRSpatialReference *poSRS)
146 : {
147 41 : OGRSpatialReference oSRSIdentified(*poSRS);
148 :
149 41 : if (poSRS->GetAuthorityName(nullptr) == nullptr)
150 : {
151 : // Try to find a registered CRS that matches the input one
152 4 : int nEntries = 0;
153 4 : int *panConfidence = nullptr;
154 : OGRSpatialReferenceH *pahSRS =
155 4 : poSRS->FindMatches(nullptr, &nEntries, &panConfidence);
156 :
157 : // If there are several matches >= 90%, take the only one
158 : // that is EPSG
159 4 : int iOtherAuthority = -1;
160 4 : int iEPSG = -1;
161 4 : const char *const apszOptions[] = {
162 : "IGNORE_DATA_AXIS_TO_SRS_AXIS_MAPPING=YES", nullptr};
163 4 : int iConfidenceBestMatch = -1;
164 6 : for (int iSRS = 0; iSRS < nEntries; iSRS++)
165 : {
166 4 : auto poCandidateCRS = OGRSpatialReference::FromHandle(pahSRS[iSRS]);
167 4 : if (panConfidence[iSRS] < iConfidenceBestMatch ||
168 4 : panConfidence[iSRS] < 70)
169 : {
170 : break;
171 : }
172 3 : if (poSRS->IsSame(poCandidateCRS, apszOptions))
173 : {
174 : const char *pszAuthName =
175 3 : poCandidateCRS->GetAuthorityName(nullptr);
176 3 : if (pszAuthName != nullptr && EQUAL(pszAuthName, "EPSG"))
177 : {
178 2 : iOtherAuthority = -2;
179 2 : if (iEPSG < 0)
180 : {
181 2 : iConfidenceBestMatch = panConfidence[iSRS];
182 2 : iEPSG = iSRS;
183 : }
184 : else
185 : {
186 0 : iEPSG = -1;
187 0 : break;
188 : }
189 : }
190 1 : else if (iEPSG < 0 && pszAuthName != nullptr)
191 : {
192 1 : if (EQUAL(pszAuthName, "OGC"))
193 : {
194 : const char *pszAuthCode =
195 1 : poCandidateCRS->GetAuthorityCode(nullptr);
196 1 : if (pszAuthCode && EQUAL(pszAuthCode, "CRS84"))
197 : {
198 1 : iOtherAuthority = iSRS;
199 1 : break;
200 : }
201 : }
202 0 : else if (iOtherAuthority == -1)
203 : {
204 0 : iConfidenceBestMatch = panConfidence[iSRS];
205 0 : iOtherAuthority = iSRS;
206 : }
207 : else
208 0 : iOtherAuthority = -2;
209 : }
210 : }
211 : }
212 4 : if (iEPSG >= 0)
213 : {
214 2 : oSRSIdentified = *OGRSpatialReference::FromHandle(pahSRS[iEPSG]);
215 : }
216 2 : else if (iOtherAuthority >= 0)
217 : {
218 : oSRSIdentified =
219 1 : *OGRSpatialReference::FromHandle(pahSRS[iOtherAuthority]);
220 : }
221 4 : OSRFreeSRSArray(pahSRS);
222 4 : CPLFree(panConfidence);
223 : }
224 :
225 41 : return oSRSIdentified;
226 : }
227 :
228 : /************************************************************************/
229 : /* CreateSchemaCommon() */
230 : /************************************************************************/
231 :
232 504 : inline void OGRArrowWriterLayer::CreateSchemaCommon()
233 : {
234 504 : CPLAssert(static_cast<int>(m_aeGeomEncoding.size()) ==
235 : m_poFeatureDefn->GetGeomFieldCount());
236 :
237 1008 : std::vector<std::shared_ptr<arrow::Field>> fields;
238 504 : bool bNeedGDALSchema = false;
239 :
240 504 : m_anTZFlag.resize(m_poFeatureDefn->GetFieldCount(), TZFLAG_UNINITIALIZED);
241 :
242 504 : if (!m_osFIDColumn.empty())
243 : {
244 21 : bNeedGDALSchema = true;
245 21 : fields.emplace_back(arrow::field(m_osFIDColumn, arrow::int64(), false));
246 : }
247 :
248 504 : if (!m_apoFieldsFromArrowSchema.empty())
249 : {
250 129 : fields.insert(fields.end(), m_apoFieldsFromArrowSchema.begin(),
251 258 : m_apoFieldsFromArrowSchema.end());
252 : }
253 :
254 1272 : for (int i = 0; i < m_poFeatureDefn->GetFieldCount(); ++i)
255 : {
256 768 : const auto poFieldDefn = m_poFeatureDefn->GetFieldDefn(i);
257 768 : std::shared_ptr<arrow::DataType> dt;
258 768 : const auto eDT = poFieldDefn->GetType();
259 768 : const auto eSubDT = poFieldDefn->GetSubType();
260 768 : const auto &osDomainName = poFieldDefn->GetDomainName();
261 768 : const OGRFieldDomain *poFieldDomain = nullptr;
262 768 : const int nWidth = poFieldDefn->GetWidth();
263 768 : if (!osDomainName.empty())
264 : {
265 4 : const auto oIter = m_oMapFieldDomains.find(osDomainName);
266 4 : if (oIter == m_oMapFieldDomains.end())
267 : {
268 0 : CPLError(CE_Warning, CPLE_AppDefined,
269 : "Field %s references domain %s, but the later one "
270 : "has not been created",
271 : poFieldDefn->GetNameRef(), osDomainName.c_str());
272 : }
273 : else
274 : {
275 4 : poFieldDomain = oIter->second.get();
276 : }
277 : }
278 768 : const char *pszFieldMetadata = nullptr;
279 768 : switch (eDT)
280 : {
281 81 : case OFTInteger:
282 81 : if (eSubDT == OFSTBoolean)
283 4 : dt = arrow::boolean();
284 77 : else if (eSubDT == OFSTInt16)
285 4 : dt = arrow::int16();
286 : else
287 73 : dt = arrow::int32();
288 81 : if (poFieldDomain != nullptr)
289 : {
290 4 : dt = arrow::dictionary(dt, arrow::utf8());
291 : }
292 81 : break;
293 :
294 51 : case OFTInteger64:
295 51 : dt = arrow::int64();
296 51 : if (poFieldDomain != nullptr)
297 : {
298 0 : dt = arrow::dictionary(dt, arrow::utf8());
299 : }
300 51 : break;
301 :
302 68 : case OFTReal:
303 : {
304 68 : const int nPrecision = poFieldDefn->GetPrecision();
305 68 : if (nWidth != 0 && nPrecision != 0)
306 : {
307 : // Since arrow 18.0, we could use arrow::smallest_decimal()
308 : // to return the smallest representation (i.e. possibly
309 : // decimal32 and decimal64). But for now keep decimal128
310 : // as the minimum for backwards compatibility.
311 : // GetValueDecimal() and other functions in
312 : // ogrlayerarrow.cpp would have to be adapted for decimal32
313 : // and decimal64 compatibility.
314 11 : if (nWidth > 38)
315 0 : dt = arrow::decimal256(nWidth, nPrecision);
316 : else
317 11 : dt = arrow::decimal128(nWidth, nPrecision);
318 : }
319 57 : else if (eSubDT == OFSTFloat32)
320 7 : dt = arrow::float32();
321 : else
322 50 : dt = arrow::float64();
323 68 : break;
324 : }
325 :
326 343 : case OFTString:
327 : case OFTWideString:
328 343 : if ((eSubDT != OFSTNone && eSubDT != OFSTJSON) || nWidth > 0)
329 3 : bNeedGDALSchema = true;
330 343 : if (eSubDT == OFSTJSON)
331 83 : pszFieldMetadata = EXTENSION_NAME_ARROW_JSON;
332 343 : dt = arrow::utf8();
333 343 : break;
334 :
335 19 : case OFTBinary:
336 19 : if (nWidth != 0)
337 4 : dt = arrow::fixed_size_binary(nWidth);
338 : else
339 15 : dt = arrow::binary();
340 19 : break;
341 :
342 48 : case OFTIntegerList:
343 48 : if (eSubDT == OFSTBoolean)
344 8 : dt = arrow::list(arrow::boolean());
345 40 : else if (eSubDT == OFSTInt16)
346 0 : dt = arrow::list(arrow::int16());
347 : else
348 40 : dt = arrow::list(arrow::int32());
349 48 : break;
350 :
351 20 : case OFTInteger64List:
352 20 : dt = arrow::list(arrow::int64());
353 20 : break;
354 :
355 35 : case OFTRealList:
356 35 : if (eSubDT == OFSTFloat32)
357 11 : dt = arrow::list(arrow::float32());
358 : else
359 24 : dt = arrow::list(arrow::float64());
360 35 : break;
361 :
362 12 : case OFTStringList:
363 : case OFTWideStringList:
364 12 : dt = arrow::list(arrow::utf8());
365 12 : break;
366 :
367 31 : case OFTDate:
368 31 : dt = arrow::date32();
369 31 : break;
370 :
371 8 : case OFTTime:
372 8 : dt = arrow::time32(arrow::TimeUnit::MILLI);
373 8 : break;
374 :
375 52 : case OFTDateTime:
376 : {
377 52 : const int nTZFlag = poFieldDefn->GetTZFlag();
378 : const char *pszTIMESTAMP_WITH_OFFSET =
379 52 : m_aosCreationOptions.FetchNameValueDef(
380 : "TIMESTAMP_WITH_OFFSET", "AUTO");
381 52 : if ((nTZFlag == OGR_TZFLAG_MIXED_TZ &&
382 5 : !EQUAL(pszTIMESTAMP_WITH_OFFSET, "NO")) ||
383 48 : EQUAL(pszTIMESTAMP_WITH_OFFSET, "YES"))
384 : {
385 4 : m_anTZFlag[i] = nTZFlag;
386 : std::vector<std::shared_ptr<arrow::Field>>
387 : tsWithOffsetFields{
388 : arrow::field(
389 : ATSWO_TIMESTAMP_FIELD_NAME,
390 8 : arrow::timestamp(arrow::TimeUnit::MILLI, "UTC"),
391 : false),
392 : arrow::field(ATSWO_OFFSET_MINUTES_FIELD_NAME,
393 24 : arrow::int16(), false)};
394 4 : dt = arrow::struct_(std::move(tsWithOffsetFields));
395 4 : pszFieldMetadata =
396 4 : EXTENSION_NAME_ARROW_TIMESTAMP_WITH_OFFSET;
397 : }
398 : else
399 : {
400 48 : if (nTZFlag >= OGR_TZFLAG_MIXED_TZ)
401 : {
402 13 : m_anTZFlag[i] = nTZFlag;
403 : }
404 48 : dt = arrow::timestamp(arrow::TimeUnit::MILLI);
405 : }
406 52 : break;
407 : }
408 : }
409 :
410 768 : auto field = arrow::field(poFieldDefn->GetNameRef(), std::move(dt),
411 2304 : poFieldDefn->IsNullable());
412 768 : if (pszFieldMetadata)
413 : {
414 87 : auto kvMetadata = std::make_shared<arrow::KeyValueMetadata>();
415 87 : kvMetadata->Append(ARROW_EXTENSION_NAME_KEY, pszFieldMetadata);
416 87 : field = field->WithMetadata(kvMetadata);
417 : }
418 :
419 768 : fields.emplace_back(std::move(field));
420 768 : if (poFieldDefn->GetAlternativeNameRef()[0])
421 2 : bNeedGDALSchema = true;
422 768 : if (!poFieldDefn->GetComment().empty())
423 3 : bNeedGDALSchema = true;
424 : }
425 :
426 999 : for (int i = 0; i < m_poFeatureDefn->GetGeomFieldCount(); ++i)
427 : {
428 495 : const auto poGeomFieldDefn = m_poFeatureDefn->GetGeomFieldDefn(i);
429 495 : const auto eGType = poGeomFieldDefn->GetType();
430 : const int nDim =
431 495 : 2 + (OGR_GT_HasZ(eGType) ? 1 : 0) + (OGR_GT_HasM(eGType) ? 1 : 0);
432 :
433 495 : const bool pointFieldNullable = GetDriverUCName() == "PARQUET";
434 :
435 : // Fixed Size List GeoArrow encoding
436 : const auto getFixedSizeListOfPoint =
437 228 : [nDim, eGType, pointFieldNullable]()
438 : {
439 : return arrow::fixed_size_list(
440 146 : arrow::field(nDim == 2 ? "xy"
441 30 : : nDim == 3 ? (OGR_GT_HasZ(eGType) ? "xyz" : "xym")
442 : : "xyzm",
443 : arrow::float64(), pointFieldNullable),
444 116 : nDim);
445 495 : };
446 :
447 : // Struct GeoArrow encoding
448 1485 : auto xField(arrow::field("x", arrow::float64(), false));
449 1485 : auto yField(arrow::field("y", arrow::float64(), false));
450 : std::vector<std::shared_ptr<arrow::Field>> pointFields{
451 : arrow::field("x", arrow::float64(), false),
452 2970 : arrow::field("y", arrow::float64(), false)};
453 495 : if (OGR_GT_HasZ(eGType))
454 : pointFields.emplace_back(
455 145 : arrow::field("z", arrow::float64(), false));
456 495 : if (OGR_GT_HasM(eGType))
457 : pointFields.emplace_back(
458 52 : arrow::field("m", arrow::float64(), false));
459 990 : auto pointStructType(arrow::struct_(std::move(pointFields)));
460 :
461 40 : const auto getListOfVertices = [&getFixedSizeListOfPoint]()
462 : {
463 80 : return arrow::list(std::make_shared<arrow::Field>(
464 120 : "vertices", getFixedSizeListOfPoint()));
465 495 : };
466 :
467 22 : const auto getListOfRings = [&getListOfVertices]()
468 : {
469 : return arrow::list(
470 44 : std::make_shared<arrow::Field>("rings", getListOfVertices()));
471 495 : };
472 :
473 146 : const auto getListOfVerticesStruct = [&pointStructType]()
474 : {
475 : return arrow::list(
476 292 : std::make_shared<arrow::Field>("vertices", pointStructType));
477 495 : };
478 :
479 86 : const auto getListOfRingsStruct = [&getListOfVerticesStruct]()
480 : {
481 172 : return arrow::list(std::make_shared<arrow::Field>(
482 258 : "rings", getListOfVerticesStruct()));
483 495 : };
484 :
485 495 : std::shared_ptr<arrow::DataType> dt;
486 495 : switch (m_aeGeomEncoding[i])
487 : {
488 169 : case OGRArrowGeomEncoding::WKB:
489 : #if ARROW_VERSION_MAJOR >= 21
490 : if (m_bUseArrowWKBExtension)
491 : {
492 : CPLJSONDocument oMetadataDoc;
493 :
494 : const auto poSRS = poGeomFieldDefn->GetSpatialRef();
495 : if (poSRS)
496 : {
497 : OGRSpatialReference oSRSIdentified(IdentifyCRS(poSRS));
498 :
499 : // CRS encoded as PROJJSON
500 : char *pszPROJJSON = nullptr;
501 : oSRSIdentified.exportToPROJJSON(&pszPROJJSON, nullptr);
502 : CPLJSONDocument oCRSDoc;
503 : CPL_IGNORE_RET_VAL(oCRSDoc.LoadMemory(pszPROJJSON));
504 : CPLFree(pszPROJJSON);
505 : CPLJSONObject oCRSRoot = oCRSDoc.GetRoot();
506 : RemoveIDFromMemberOfEnsembles(oCRSRoot);
507 :
508 : oMetadataDoc.GetRoot().Add("crs", oCRSRoot);
509 : }
510 :
511 : if (m_bEdgesSpherical)
512 : {
513 : oMetadataDoc.GetRoot().Add("edges", "spherical");
514 : }
515 :
516 : const std::string metadata = oMetadataDoc.GetRoot().Format(
517 : CPLJSONObject::PrettyFormat::Plain);
518 : dt = std::make_shared<OGRGeoArrowWkbExtensionType>(
519 : arrow::binary(), metadata);
520 : }
521 : else
522 : #endif
523 : {
524 169 : dt = arrow::binary();
525 : }
526 169 : break;
527 :
528 53 : case OGRArrowGeomEncoding::WKT:
529 53 : dt = arrow::utf8();
530 53 : break;
531 :
532 0 : case OGRArrowGeomEncoding::GEOARROW_FSL_GENERIC:
533 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_GENERIC:
534 0 : CPLAssert(false);
535 : break;
536 :
537 9 : case OGRArrowGeomEncoding::GEOARROW_FSL_POINT:
538 9 : dt = getFixedSizeListOfPoint();
539 9 : break;
540 :
541 9 : case OGRArrowGeomEncoding::GEOARROW_FSL_LINESTRING:
542 9 : dt = getListOfVertices();
543 9 : break;
544 :
545 11 : case OGRArrowGeomEncoding::GEOARROW_FSL_POLYGON:
546 11 : dt = getListOfRings();
547 11 : break;
548 :
549 9 : case OGRArrowGeomEncoding::GEOARROW_FSL_MULTIPOINT:
550 18 : dt = arrow::list(std::make_shared<arrow::Field>(
551 27 : "points", getFixedSizeListOfPoint()));
552 9 : break;
553 :
554 9 : case OGRArrowGeomEncoding::GEOARROW_FSL_MULTILINESTRING:
555 18 : dt = arrow::list(std::make_shared<arrow::Field>(
556 27 : "linestrings", getListOfVertices()));
557 9 : break;
558 :
559 11 : case OGRArrowGeomEncoding::GEOARROW_FSL_MULTIPOLYGON:
560 22 : dt = arrow::list(std::make_shared<arrow::Field>(
561 33 : "polygons", getListOfRings()));
562 11 : break;
563 :
564 39 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_POINT:
565 39 : dt = pointStructType;
566 39 : break;
567 :
568 30 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_LINESTRING:
569 30 : dt = getListOfVerticesStruct();
570 30 : break;
571 :
572 44 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_POLYGON:
573 44 : dt = getListOfRingsStruct();
574 44 : break;
575 :
576 30 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_MULTIPOINT:
577 60 : dt = arrow::list(
578 90 : std::make_shared<arrow::Field>("points", pointStructType));
579 30 : break;
580 :
581 30 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_MULTILINESTRING:
582 60 : dt = arrow::list(std::make_shared<arrow::Field>(
583 90 : "linestrings", getListOfVerticesStruct()));
584 30 : break;
585 :
586 42 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_MULTIPOLYGON:
587 84 : dt = arrow::list(std::make_shared<arrow::Field>(
588 126 : "polygons", getListOfRingsStruct()));
589 42 : break;
590 : }
591 :
592 : std::shared_ptr<arrow::Field> field(
593 495 : arrow::field(poGeomFieldDefn->GetNameRef(), std::move(dt),
594 1485 : poGeomFieldDefn->IsNullable()));
595 495 : if (m_bWriteFieldArrowExtensionName)
596 : {
597 138 : auto kvMetadata = field->metadata()
598 138 : ? field->metadata()->Copy()
599 138 : : std::make_shared<arrow::KeyValueMetadata>();
600 276 : kvMetadata->Append(
601 : ARROW_EXTENSION_NAME_KEY,
602 138 : GetGeomEncodingAsString(m_aeGeomEncoding[i], false));
603 138 : field = field->WithMetadata(kvMetadata);
604 : }
605 :
606 495 : m_apoBaseStructGeomType.emplace_back(std::move(pointStructType));
607 :
608 495 : fields.emplace_back(std::move(field));
609 : }
610 :
611 504 : if (m_bWriteBBoxStruct)
612 : {
613 579 : for (int i = 0; i < m_poFeatureDefn->GetGeomFieldCount(); ++i)
614 : {
615 288 : const auto poGeomFieldDefn = m_poFeatureDefn->GetGeomFieldDefn(i);
616 864 : auto bbox_field_xmin(arrow::field("xmin", arrow::float32(), false));
617 864 : auto bbox_field_ymin(arrow::field("ymin", arrow::float32(), false));
618 864 : auto bbox_field_xmax(arrow::field("xmax", arrow::float32(), false));
619 864 : auto bbox_field_ymax(arrow::field("ymax", arrow::float32(), false));
620 : auto bbox_field(arrow::field(
621 288 : m_oBBoxStructFieldName.empty()
622 963 : ? std::string(poGeomFieldDefn->GetNameRef()).append("_bbox")
623 : : m_oBBoxStructFieldName,
624 1728 : arrow::struct_(
625 288 : {std::move(bbox_field_xmin), std::move(bbox_field_ymin),
626 1728 : std::move(bbox_field_xmax), std::move(bbox_field_ymax)}),
627 1602 : poGeomFieldDefn->IsNullable()));
628 288 : fields.emplace_back(bbox_field);
629 288 : m_apoFieldsBBOX.emplace_back(bbox_field);
630 : }
631 : }
632 :
633 504 : m_aoEnvelopes.resize(m_poFeatureDefn->GetGeomFieldCount());
634 504 : m_oSetWrittenGeometryTypes.resize(m_poFeatureDefn->GetGeomFieldCount());
635 :
636 504 : m_poSchema = arrow::schema(std::move(fields));
637 504 : CPLAssert(m_poSchema);
638 531 : if (bNeedGDALSchema &&
639 27 : CPLTestBool(CPLGetConfigOption(
640 531 : ("OGR_" + GetDriverUCName() + "_WRITE_GDAL_SCHEMA").c_str(),
641 : "YES")))
642 : {
643 54 : CPLJSONObject oRoot;
644 54 : CPLJSONObject oColumns;
645 :
646 27 : if (!m_osFIDColumn.empty())
647 21 : oRoot.Add("fid", m_osFIDColumn);
648 :
649 27 : oRoot.Add("columns", oColumns);
650 221 : for (int i = 0; i < m_poFeatureDefn->GetFieldCount(); ++i)
651 : {
652 194 : const auto poFieldDefn = m_poFeatureDefn->GetFieldDefn(i);
653 388 : CPLJSONObject oColumn;
654 194 : oColumns.Add(poFieldDefn->GetNameRef(), oColumn);
655 194 : oColumn.Add("type", OGR_GetFieldTypeName(poFieldDefn->GetType()));
656 194 : const auto eSubDT = poFieldDefn->GetSubType();
657 194 : if (eSubDT != OFSTNone)
658 56 : oColumn.Add("subtype", OGR_GetFieldSubTypeName(eSubDT));
659 194 : const int nWidth = poFieldDefn->GetWidth();
660 194 : if (nWidth > 0)
661 15 : oColumn.Add("width", nWidth);
662 194 : const int nPrecision = poFieldDefn->GetPrecision();
663 194 : if (nPrecision > 0)
664 7 : oColumn.Add("precision", nPrecision);
665 194 : if (poFieldDefn->GetAlternativeNameRef()[0])
666 2 : oColumn.Add("alternative_name",
667 : poFieldDefn->GetAlternativeNameRef());
668 194 : if (!poFieldDefn->GetComment().empty())
669 3 : oColumn.Add("comment", poFieldDefn->GetComment());
670 : }
671 :
672 27 : auto kvMetadata = m_poSchema->metadata()
673 0 : ? m_poSchema->metadata()->Copy()
674 54 : : std::make_shared<arrow::KeyValueMetadata>();
675 54 : kvMetadata->Append("gdal:schema",
676 54 : oRoot.Format(CPLJSONObject::PrettyFormat::Plain));
677 27 : m_poSchema = m_poSchema->WithMetadata(kvMetadata);
678 27 : CPLAssert(m_poSchema);
679 : }
680 504 : }
681 :
682 : /************************************************************************/
683 : /* FinalizeSchema() */
684 : /************************************************************************/
685 :
686 446 : inline void OGRArrowWriterLayer::FinalizeSchema()
687 : {
688 : // Final tuning of schema taking into actual timezone values
689 : // from features
690 446 : int nArrowIdxFirstField = !m_osFIDColumn.empty() ? 1 : 0;
691 1210 : for (int i = 0; i < m_poFeatureDefn->GetFieldCount(); ++i)
692 : {
693 764 : const auto poFieldDefn = m_poFeatureDefn->GetFieldDefn(i);
694 778 : if (m_anTZFlag[i] >= OGR_TZFLAG_MIXED_TZ &&
695 778 : poFieldDefn->GetTZFlag() != OGR_TZFLAG_MIXED_TZ &&
696 12 : m_poSchema->field(nArrowIdxFirstField + i)->type()->id() !=
697 : arrow::Type::STRUCT)
698 : {
699 12 : const int nOffset = m_anTZFlag[i] == OGR_TZFLAG_MIXED_TZ
700 12 : ? 0
701 12 : : (m_anTZFlag[i] - OGR_TZFLAG_UTC) * 15;
702 12 : int nHours = static_cast<int>(nOffset / 60); // Round towards zero.
703 12 : const int nMinutes = std::abs(nOffset - nHours * 60);
704 :
705 : const std::string osTZ =
706 : CPLSPrintf("%c%02d:%02d", nOffset >= 0 ? '+' : '-',
707 24 : std::abs(nHours), nMinutes);
708 24 : auto dt = arrow::timestamp(arrow::TimeUnit::MILLI, osTZ);
709 12 : auto field = arrow::field(poFieldDefn->GetNameRef(), std::move(dt),
710 36 : poFieldDefn->IsNullable());
711 24 : auto result = m_poSchema->SetField(nArrowIdxFirstField + i, field);
712 12 : if (!result.ok())
713 : {
714 0 : CPLError(CE_Warning, CPLE_AppDefined,
715 : "Schema::SetField() failed with %s",
716 0 : result.status().message().c_str());
717 : }
718 : else
719 : {
720 12 : m_poSchema = *result;
721 : }
722 : }
723 : }
724 446 : }
725 :
726 : /************************************************************************/
727 : /* AddFieldDomain() */
728 : /************************************************************************/
729 :
730 : inline bool
731 11 : OGRArrowWriterLayer::AddFieldDomain(std::unique_ptr<OGRFieldDomain> &&domain,
732 : std::string &failureReason)
733 : {
734 11 : if (domain->GetDomainType() != OFDT_CODED)
735 : {
736 0 : failureReason = "Only coded field domains are supported by Arrow";
737 0 : return false;
738 : }
739 :
740 : const OGRCodedFieldDomain *poDomain =
741 11 : static_cast<const OGRCodedFieldDomain *>(domain.get());
742 11 : const OGRCodedValue *psIter = poDomain->GetEnumeration();
743 :
744 : auto poStringBuilder =
745 22 : std::make_shared<arrow::StringBuilder>(m_poMemoryPool);
746 :
747 11 : int nLastCode = -1;
748 44 : for (; psIter->pszCode; ++psIter)
749 : {
750 33 : if (CPLGetValueType(psIter->pszCode) != CPL_VALUE_INTEGER)
751 : {
752 0 : failureReason = "Non integer code in domain ";
753 0 : failureReason += domain->GetName();
754 0 : return false;
755 : }
756 33 : int nCode = atoi(psIter->pszCode);
757 33 : if (nCode <= nLastCode || nCode - nLastCode > 100)
758 : {
759 0 : failureReason = "Too sparse codes in domain ";
760 0 : failureReason += domain->GetName();
761 0 : return false;
762 : }
763 33 : for (int i = nLastCode + 1; i < nCode; ++i)
764 : {
765 0 : OGR_ARROW_RETURN_FALSE_NOT_OK(poStringBuilder->AppendNull());
766 : }
767 33 : if (psIter->pszValue)
768 33 : OGR_ARROW_RETURN_FALSE_NOT_OK(
769 : poStringBuilder->Append(psIter->pszValue));
770 : else
771 0 : OGR_ARROW_RETURN_FALSE_NOT_OK(poStringBuilder->AppendNull());
772 33 : nLastCode = nCode;
773 : }
774 :
775 11 : std::shared_ptr<arrow::Array> stringArray;
776 22 : auto status = poStringBuilder->Finish(&stringArray);
777 11 : if (!status.ok())
778 : {
779 0 : CPLError(CE_Failure, CPLE_AppDefined,
780 : "StringArray::Finish() failed with %s",
781 0 : status.message().c_str());
782 0 : return false;
783 : }
784 :
785 11 : m_oMapFieldDomainToStringArray[domain->GetName()] = std::move(stringArray);
786 11 : m_oMapFieldDomains[domain->GetName()] = std::move(domain);
787 11 : return true;
788 : }
789 :
790 : /************************************************************************/
791 : /* GetFieldDomainNames() */
792 : /************************************************************************/
793 :
794 0 : inline std::vector<std::string> OGRArrowWriterLayer::GetFieldDomainNames() const
795 : {
796 0 : std::vector<std::string> names;
797 0 : names.reserve(m_oMapFieldDomains.size());
798 0 : for (const auto &it : m_oMapFieldDomains)
799 : {
800 0 : names.emplace_back(it.first);
801 : }
802 0 : return names;
803 : }
804 :
805 : /************************************************************************/
806 : /* GetFieldDomain() */
807 : /************************************************************************/
808 :
809 : inline const OGRFieldDomain *
810 15 : OGRArrowWriterLayer::GetFieldDomain(const std::string &name) const
811 : {
812 15 : const auto iter = m_oMapFieldDomains.find(name);
813 15 : if (iter == m_oMapFieldDomains.end())
814 11 : return nullptr;
815 4 : return iter->second.get();
816 : }
817 :
818 : /************************************************************************/
819 : /* CreateField() */
820 : /************************************************************************/
821 :
822 769 : inline OGRErr OGRArrowWriterLayer::CreateField(const OGRFieldDefn *poField,
823 : int /* bApproxOK */)
824 : {
825 769 : if (m_poSchema)
826 : {
827 1 : CPLError(CE_Failure, CPLE_NotSupported,
828 : "Cannot add field after a first feature has been written");
829 1 : return OGRERR_FAILURE;
830 : }
831 768 : if (!m_apoFieldsFromArrowSchema.empty())
832 : {
833 0 : CPLError(CE_Failure, CPLE_NotSupported,
834 : "Cannot mix calls to CreateField() and "
835 : "CreateFieldFromArrowSchema()");
836 0 : return OGRERR_FAILURE;
837 : }
838 768 : m_poFeatureDefn->AddFieldDefn(poField);
839 768 : return OGRERR_NONE;
840 : }
841 :
842 : /************************************************************************/
843 : /* OGRLayer::CreateFieldFromArrowSchema() */
844 : /************************************************************************/
845 :
846 1081 : inline bool OGRArrowWriterLayer::CreateFieldFromArrowSchema(
847 : const struct ArrowSchema *schema, CSLConstList /*papszOptions*/)
848 : {
849 1081 : if (m_poSchema)
850 : {
851 0 : CPLError(CE_Failure, CPLE_NotSupported,
852 : "Cannot add field after a first feature has been written");
853 0 : return false;
854 : }
855 :
856 1081 : if (m_poFeatureDefn->GetFieldCount())
857 : {
858 0 : CPLError(CE_Failure, CPLE_NotSupported,
859 : "Cannot mix calls to CreateField() and "
860 : "CreateFieldFromArrowSchema()");
861 0 : return false;
862 : }
863 :
864 1081 : if (m_osFIDColumn == schema->name)
865 : {
866 0 : CPLError(CE_Failure, CPLE_AppDefined,
867 : "FID column has the same name as this field: %s",
868 0 : schema->name);
869 0 : return false;
870 : }
871 :
872 36422 : for (auto &apoField : m_apoFieldsFromArrowSchema)
873 : {
874 35341 : if (apoField->name() == schema->name)
875 : {
876 0 : CPLError(CE_Failure, CPLE_AppDefined,
877 0 : "Field of name %s already exists", schema->name);
878 0 : return false;
879 : }
880 : }
881 :
882 1081 : if (m_poFeatureDefn->GetGeomFieldIndex(schema->name) >= 0)
883 : {
884 0 : CPLError(CE_Failure, CPLE_AppDefined,
885 0 : "Geometry field of name %s already exists", schema->name);
886 0 : return false;
887 : }
888 :
889 : // ImportField() would release the schema, but we don't want that
890 : // So copy the structure content into a local variable, and override its
891 : // release callback to a no-op. This may be a bit fragile, but it doesn't
892 : // look like ImportField implementation tries to access the C ArrowSchema
893 : // after it has been called.
894 1081 : struct ArrowSchema lSchema = *schema;
895 1081 : const auto DummyFreeSchema = [](struct ArrowSchema *ptrSchema)
896 1081 : { ptrSchema->release = nullptr; };
897 1081 : lSchema.release = DummyFreeSchema;
898 2162 : auto result = arrow::ImportField(&lSchema);
899 1081 : CPLAssert(lSchema.release == nullptr);
900 1081 : if (!result.ok())
901 : {
902 0 : CPLError(CE_Failure, CPLE_AppDefined,
903 : "CreateFieldFromArrowSchema() failed: %s",
904 0 : result.status().message().c_str());
905 0 : return false;
906 : }
907 1081 : m_apoFieldsFromArrowSchema.emplace_back(std::move(*result));
908 1081 : return true;
909 : }
910 :
911 : /************************************************************************/
912 : /* GetPreciseArrowGeomEncoding() */
913 : /************************************************************************/
914 :
915 275 : inline OGRArrowGeomEncoding OGRArrowWriterLayer::GetPreciseArrowGeomEncoding(
916 : OGRArrowGeomEncoding eEncodingType, OGRwkbGeometryType eGType)
917 : {
918 275 : CPLAssert(eEncodingType == OGRArrowGeomEncoding::GEOARROW_FSL_GENERIC ||
919 : eEncodingType == OGRArrowGeomEncoding::GEOARROW_STRUCT_GENERIC);
920 275 : const auto eFlatType = wkbFlatten(eGType);
921 275 : if (eFlatType == wkbPoint)
922 : {
923 : return eEncodingType == OGRArrowGeomEncoding::GEOARROW_FSL_GENERIC
924 48 : ? OGRArrowGeomEncoding::GEOARROW_FSL_POINT
925 48 : : OGRArrowGeomEncoding::GEOARROW_STRUCT_POINT;
926 : }
927 227 : else if (eFlatType == wkbLineString)
928 : {
929 : return eEncodingType == OGRArrowGeomEncoding::GEOARROW_FSL_GENERIC
930 39 : ? OGRArrowGeomEncoding::GEOARROW_FSL_LINESTRING
931 39 : : OGRArrowGeomEncoding::GEOARROW_STRUCT_LINESTRING;
932 : }
933 188 : else if (eFlatType == wkbPolygon)
934 : {
935 : return eEncodingType == OGRArrowGeomEncoding::GEOARROW_FSL_GENERIC
936 55 : ? OGRArrowGeomEncoding::GEOARROW_FSL_POLYGON
937 55 : : OGRArrowGeomEncoding::GEOARROW_STRUCT_POLYGON;
938 : }
939 133 : else if (eFlatType == wkbMultiPoint)
940 : {
941 : return eEncodingType == OGRArrowGeomEncoding::GEOARROW_FSL_GENERIC
942 39 : ? OGRArrowGeomEncoding::GEOARROW_FSL_MULTIPOINT
943 39 : : OGRArrowGeomEncoding::GEOARROW_STRUCT_MULTIPOINT;
944 : }
945 94 : else if (eFlatType == wkbMultiLineString)
946 : {
947 : return eEncodingType == OGRArrowGeomEncoding::GEOARROW_FSL_GENERIC
948 39 : ? OGRArrowGeomEncoding::GEOARROW_FSL_MULTILINESTRING
949 39 : : OGRArrowGeomEncoding::GEOARROW_STRUCT_MULTILINESTRING;
950 : }
951 55 : else if (eFlatType == wkbMultiPolygon)
952 : {
953 : return eEncodingType == OGRArrowGeomEncoding::GEOARROW_FSL_GENERIC
954 53 : ? OGRArrowGeomEncoding::GEOARROW_FSL_MULTIPOLYGON
955 53 : : OGRArrowGeomEncoding::GEOARROW_STRUCT_MULTIPOLYGON;
956 : }
957 : else
958 : {
959 2 : CPLError(CE_Failure, CPLE_NotSupported,
960 : "GeoArrow encoding is currently not supported for %s",
961 : OGRGeometryTypeToName(eGType));
962 2 : return eEncodingType;
963 : }
964 : }
965 :
966 : /************************************************************************/
967 : /* GetGeomEncodingAsString() */
968 : /************************************************************************/
969 :
970 : inline const char *
971 747 : OGRArrowWriterLayer::GetGeomEncodingAsString(OGRArrowGeomEncoding eGeomEncoding,
972 : bool bForParquetGeo)
973 : {
974 747 : switch (eGeomEncoding)
975 : {
976 216 : case OGRArrowGeomEncoding::WKB:
977 216 : return bForParquetGeo ? "WKB" : "geoarrow.wkb";
978 111 : case OGRArrowGeomEncoding::WKT:
979 111 : return bForParquetGeo ? "WKT" : "geoarrow.wkt";
980 0 : case OGRArrowGeomEncoding::GEOARROW_FSL_GENERIC:
981 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_GENERIC:
982 0 : CPLAssert(false);
983 : break;
984 19 : case OGRArrowGeomEncoding::GEOARROW_FSL_POINT:
985 19 : return "geoarrow.point";
986 19 : case OGRArrowGeomEncoding::GEOARROW_FSL_LINESTRING:
987 19 : return "geoarrow.linestring";
988 21 : case OGRArrowGeomEncoding::GEOARROW_FSL_POLYGON:
989 21 : return "geoarrow.polygon";
990 19 : case OGRArrowGeomEncoding::GEOARROW_FSL_MULTIPOINT:
991 19 : return "geoarrow.multipoint";
992 19 : case OGRArrowGeomEncoding::GEOARROW_FSL_MULTILINESTRING:
993 19 : return "geoarrow.multilinestring";
994 21 : case OGRArrowGeomEncoding::GEOARROW_FSL_MULTIPOLYGON:
995 21 : return "geoarrow.multipolygon";
996 62 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_POINT:
997 62 : return bForParquetGeo ? "point" : "geoarrow.point";
998 42 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_LINESTRING:
999 42 : return bForParquetGeo ? "linestring" : "geoarrow.linestring";
1000 60 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_POLYGON:
1001 60 : return bForParquetGeo ? "polygon" : "geoarrow.polygon";
1002 42 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_MULTIPOINT:
1003 42 : return bForParquetGeo ? "multipoint" : "geoarrow.multipoint";
1004 42 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_MULTILINESTRING:
1005 42 : return bForParquetGeo ? "multilinestring"
1006 42 : : "geoarrow.multilinestring";
1007 54 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_MULTIPOLYGON:
1008 54 : return bForParquetGeo ? "multipolygon" : "geoarrow.multipolygon";
1009 : }
1010 0 : return nullptr;
1011 : }
1012 :
1013 : /************************************************************************/
1014 : /* CreateGeomField() */
1015 : /************************************************************************/
1016 :
1017 : inline OGRErr
1018 27 : OGRArrowWriterLayer::CreateGeomField(const OGRGeomFieldDefn *poField,
1019 : int /* bApproxOK */)
1020 : {
1021 27 : if (m_poSchema)
1022 : {
1023 1 : CPLError(CE_Failure, CPLE_NotSupported,
1024 : "Cannot add field after a first feature has been written");
1025 1 : return OGRERR_FAILURE;
1026 : }
1027 26 : const auto eGType = poField->GetType();
1028 26 : if (!IsSupportedGeometryType(eGType))
1029 : {
1030 0 : return OGRERR_FAILURE;
1031 : }
1032 :
1033 26 : if (IsSRSRequired() && poField->GetSpatialRef() == nullptr)
1034 : {
1035 0 : CPLError(CE_Warning, CPLE_AppDefined,
1036 : "Geometry column should have an associated CRS");
1037 : }
1038 26 : auto eGeomEncoding = m_eGeomEncoding;
1039 26 : if (eGeomEncoding == OGRArrowGeomEncoding::GEOARROW_FSL_GENERIC ||
1040 26 : eGeomEncoding == OGRArrowGeomEncoding::GEOARROW_STRUCT_GENERIC)
1041 : {
1042 0 : const auto eEncodingType = eGeomEncoding;
1043 0 : eGeomEncoding = GetPreciseArrowGeomEncoding(eEncodingType, eGType);
1044 0 : if (eGeomEncoding == eEncodingType)
1045 0 : return OGRERR_FAILURE;
1046 : }
1047 26 : m_aeGeomEncoding.push_back(eGeomEncoding);
1048 26 : m_poFeatureDefn->AddGeomFieldDefn(poField);
1049 26 : return OGRERR_NONE;
1050 : }
1051 :
1052 : /************************************************************************/
1053 : /* MakeGeoArrowBuilder() */
1054 : /************************************************************************/
1055 :
1056 : static std::shared_ptr<arrow::ArrayBuilder>
1057 134 : MakeGeoArrowBuilder(arrow::MemoryPool *poMemoryPool, int nDim, int nDepth)
1058 : {
1059 134 : if (nDepth == 0)
1060 104 : return std::make_shared<arrow::FixedSizeListBuilder>(
1061 104 : poMemoryPool, std::make_shared<arrow::DoubleBuilder>(poMemoryPool),
1062 52 : nDim);
1063 : else
1064 164 : return std::make_shared<arrow::ListBuilder>(
1065 246 : poMemoryPool, MakeGeoArrowBuilder(poMemoryPool, nDim, nDepth - 1));
1066 : }
1067 :
1068 : /************************************************************************/
1069 : /* MakeGeoArrowStructBuilder() */
1070 : /************************************************************************/
1071 :
1072 : static std::shared_ptr<arrow::ArrayBuilder>
1073 538 : MakeGeoArrowStructBuilder(arrow::MemoryPool *poMemoryPool, int nDim, int nDepth,
1074 : const std::shared_ptr<arrow::DataType> &eBaseType)
1075 : {
1076 538 : if (nDepth == 0)
1077 : {
1078 213 : std::vector<std::shared_ptr<arrow::ArrayBuilder>> builders;
1079 735 : for (int i = 0; i < nDim; ++i)
1080 : builders.emplace_back(
1081 522 : std::make_shared<arrow::DoubleBuilder>(poMemoryPool));
1082 426 : return std::make_shared<arrow::StructBuilder>(eBaseType, poMemoryPool,
1083 426 : std::move(builders));
1084 : }
1085 : else
1086 650 : return std::make_shared<arrow::ListBuilder>(
1087 650 : poMemoryPool, MakeGeoArrowStructBuilder(poMemoryPool, nDim,
1088 325 : nDepth - 1, eBaseType));
1089 : }
1090 :
1091 : /************************************************************************/
1092 : /* ClearArrayBuilers() */
1093 : /************************************************************************/
1094 :
1095 355 : inline void OGRArrowWriterLayer::ClearArrayBuilers()
1096 : {
1097 355 : m_apoBuilders.clear();
1098 355 : m_apoBuildersBBOXStruct.clear();
1099 355 : m_apoBuildersBBOXXMin.clear();
1100 355 : m_apoBuildersBBOXYMin.clear();
1101 355 : m_apoBuildersBBOXXMax.clear();
1102 355 : m_apoBuildersBBOXYMax.clear();
1103 355 : }
1104 :
1105 : /************************************************************************/
1106 : /* CreateArrayBuilders() */
1107 : /************************************************************************/
1108 :
1109 484 : inline void OGRArrowWriterLayer::CreateArrayBuilders()
1110 : {
1111 484 : m_apoBuilders.reserve(1 + m_poFeatureDefn->GetFieldCount() +
1112 484 : m_poFeatureDefn->GetGeomFieldCount());
1113 :
1114 484 : int nArrowIdx = 0;
1115 484 : if (!m_osFIDColumn.empty())
1116 : {
1117 51 : m_apoBuilders.emplace_back(std::make_shared<arrow::Int64Builder>());
1118 51 : nArrowIdx++;
1119 : }
1120 :
1121 1966 : for (int i = 0; i < m_poFeatureDefn->GetFieldCount(); ++i, ++nArrowIdx)
1122 : {
1123 1482 : const auto poFieldDefn = m_poFeatureDefn->GetFieldDefn(i);
1124 1482 : std::shared_ptr<arrow::ArrayBuilder> builder;
1125 1482 : const auto eSubDT = poFieldDefn->GetSubType();
1126 1482 : switch (poFieldDefn->GetType())
1127 : {
1128 162 : case OFTInteger:
1129 162 : if (eSubDT == OFSTBoolean)
1130 : builder =
1131 12 : std::make_shared<arrow::BooleanBuilder>(m_poMemoryPool);
1132 150 : else if (eSubDT == OFSTInt16)
1133 : builder =
1134 12 : std::make_shared<arrow::Int16Builder>(m_poMemoryPool);
1135 : else
1136 : builder =
1137 138 : std::make_shared<arrow::Int32Builder>(m_poMemoryPool);
1138 162 : break;
1139 :
1140 99 : case OFTInteger64:
1141 99 : builder = std::make_shared<arrow::Int64Builder>(m_poMemoryPool);
1142 99 : break;
1143 :
1144 122 : case OFTReal:
1145 : {
1146 244 : const auto arrowType = m_poSchema->fields()[nArrowIdx]->type();
1147 122 : if (arrowType->id() == arrow::Type::DECIMAL128)
1148 27 : builder = std::make_shared<arrow::Decimal128Builder>(
1149 27 : arrowType, m_poMemoryPool);
1150 95 : else if (arrowType->id() == arrow::Type::DECIMAL256)
1151 0 : builder = std::make_shared<arrow::Decimal256Builder>(
1152 0 : arrowType, m_poMemoryPool);
1153 95 : else if (eSubDT == OFSTFloat32)
1154 : builder =
1155 21 : std::make_shared<arrow::FloatBuilder>(m_poMemoryPool);
1156 : else
1157 : builder =
1158 74 : std::make_shared<arrow::DoubleBuilder>(m_poMemoryPool);
1159 122 : break;
1160 : }
1161 :
1162 540 : case OFTString:
1163 : case OFTWideString:
1164 : builder =
1165 540 : std::make_shared<arrow::StringBuilder>(m_poMemoryPool);
1166 540 : break;
1167 :
1168 43 : case OFTBinary:
1169 43 : if (poFieldDefn->GetWidth() != 0)
1170 24 : builder = std::make_shared<arrow::FixedSizeBinaryBuilder>(
1171 24 : arrow::fixed_size_binary(poFieldDefn->GetWidth()),
1172 24 : m_poMemoryPool);
1173 : else
1174 : builder =
1175 31 : std::make_shared<arrow::BinaryBuilder>(m_poMemoryPool);
1176 43 : break;
1177 :
1178 144 : case OFTIntegerList:
1179 : {
1180 144 : std::shared_ptr<arrow::ArrayBuilder> poBaseBuilder;
1181 144 : if (eSubDT == OFSTBoolean)
1182 : poBaseBuilder =
1183 24 : std::make_shared<arrow::BooleanBuilder>(m_poMemoryPool);
1184 120 : else if (eSubDT == OFSTInt16)
1185 : poBaseBuilder =
1186 0 : std::make_shared<arrow::Int16Builder>(m_poMemoryPool);
1187 : else
1188 : poBaseBuilder =
1189 120 : std::make_shared<arrow::Int32Builder>(m_poMemoryPool);
1190 288 : builder = std::make_shared<arrow::ListBuilder>(m_poMemoryPool,
1191 144 : poBaseBuilder);
1192 144 : break;
1193 : }
1194 :
1195 60 : case OFTInteger64List:
1196 60 : builder = std::make_shared<arrow::ListBuilder>(
1197 60 : m_poMemoryPool,
1198 180 : std::make_shared<arrow::Int64Builder>(m_poMemoryPool));
1199 :
1200 60 : break;
1201 :
1202 105 : case OFTRealList:
1203 105 : if (eSubDT == OFSTFloat32)
1204 33 : builder = std::make_shared<arrow::ListBuilder>(
1205 33 : m_poMemoryPool,
1206 99 : std::make_shared<arrow::FloatBuilder>(m_poMemoryPool));
1207 : else
1208 72 : builder = std::make_shared<arrow::ListBuilder>(
1209 72 : m_poMemoryPool,
1210 216 : std::make_shared<arrow::DoubleBuilder>(m_poMemoryPool));
1211 105 : break;
1212 :
1213 36 : case OFTStringList:
1214 : case OFTWideStringList:
1215 36 : builder = std::make_shared<arrow::ListBuilder>(
1216 36 : m_poMemoryPool,
1217 108 : std::make_shared<arrow::StringBuilder>(m_poMemoryPool));
1218 :
1219 36 : break;
1220 :
1221 47 : case OFTDate:
1222 : builder =
1223 47 : std::make_shared<arrow::Date32Builder>(m_poMemoryPool);
1224 47 : break;
1225 :
1226 24 : case OFTTime:
1227 48 : builder = std::make_shared<arrow::Time32Builder>(
1228 72 : arrow::time32(arrow::TimeUnit::MILLI), m_poMemoryPool);
1229 24 : break;
1230 :
1231 100 : case OFTDateTime:
1232 : {
1233 200 : const auto arrowType = m_poSchema->fields()[nArrowIdx]->type();
1234 100 : if (arrowType->id() == arrow::Type::STRUCT)
1235 : {
1236 4 : builder = std::make_shared<arrow::StructBuilder>(
1237 4 : arrowType, m_poMemoryPool,
1238 20 : std::vector<std::shared_ptr<arrow::ArrayBuilder>>{
1239 4 : std::make_shared<arrow::TimestampBuilder>(
1240 4 : arrow::timestamp(arrow::TimeUnit::MILLI),
1241 4 : m_poMemoryPool),
1242 4 : std::make_shared<arrow::Int16Builder>(
1243 16 : m_poMemoryPool)});
1244 : }
1245 : else
1246 : {
1247 192 : builder = std::make_shared<arrow::TimestampBuilder>(
1248 96 : arrow::timestamp(arrow::TimeUnit::MILLI),
1249 192 : m_poMemoryPool);
1250 : }
1251 100 : break;
1252 : }
1253 : }
1254 1482 : m_apoBuilders.emplace_back(builder);
1255 : }
1256 :
1257 959 : for (int i = 0; i < m_poFeatureDefn->GetGeomFieldCount(); ++i, ++nArrowIdx)
1258 : {
1259 475 : std::shared_ptr<arrow::ArrayBuilder> builder;
1260 475 : const auto poGeomFieldDefn = m_poFeatureDefn->GetGeomFieldDefn(i);
1261 475 : const auto eGType = poGeomFieldDefn->GetType();
1262 : const int nDim =
1263 475 : 2 + (OGR_GT_HasZ(eGType) ? 1 : 0) + (OGR_GT_HasM(eGType) ? 1 : 0);
1264 :
1265 475 : switch (m_aeGeomEncoding[i])
1266 : {
1267 157 : case OGRArrowGeomEncoding::WKB:
1268 : builder =
1269 157 : std::make_shared<arrow::BinaryBuilder>(m_poMemoryPool);
1270 157 : break;
1271 :
1272 53 : case OGRArrowGeomEncoding::WKT:
1273 : builder =
1274 53 : std::make_shared<arrow::StringBuilder>(m_poMemoryPool);
1275 53 : break;
1276 :
1277 8 : case OGRArrowGeomEncoding::GEOARROW_FSL_POINT:
1278 8 : builder = MakeGeoArrowBuilder(m_poMemoryPool, nDim, 0);
1279 8 : break;
1280 :
1281 8 : case OGRArrowGeomEncoding::GEOARROW_FSL_LINESTRING:
1282 8 : builder = MakeGeoArrowBuilder(m_poMemoryPool, nDim, 1);
1283 8 : break;
1284 :
1285 10 : case OGRArrowGeomEncoding::GEOARROW_FSL_POLYGON:
1286 10 : builder = MakeGeoArrowBuilder(m_poMemoryPool, nDim, 2);
1287 10 : break;
1288 :
1289 8 : case OGRArrowGeomEncoding::GEOARROW_FSL_MULTIPOINT:
1290 8 : builder = MakeGeoArrowBuilder(m_poMemoryPool, nDim, 1);
1291 8 : break;
1292 :
1293 8 : case OGRArrowGeomEncoding::GEOARROW_FSL_MULTILINESTRING:
1294 8 : builder = MakeGeoArrowBuilder(m_poMemoryPool, nDim, 2);
1295 8 : break;
1296 :
1297 10 : case OGRArrowGeomEncoding::GEOARROW_FSL_MULTIPOLYGON:
1298 10 : builder = MakeGeoArrowBuilder(m_poMemoryPool, nDim, 3);
1299 10 : break;
1300 :
1301 42 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_POINT:
1302 84 : builder = MakeGeoArrowStructBuilder(m_poMemoryPool, nDim, 0,
1303 84 : m_apoBaseStructGeomType[i]);
1304 42 : break;
1305 :
1306 29 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_LINESTRING:
1307 58 : builder = MakeGeoArrowStructBuilder(m_poMemoryPool, nDim, 1,
1308 58 : m_apoBaseStructGeomType[i]);
1309 29 : break;
1310 :
1311 43 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_POLYGON:
1312 86 : builder = MakeGeoArrowStructBuilder(m_poMemoryPool, nDim, 2,
1313 86 : m_apoBaseStructGeomType[i]);
1314 43 : break;
1315 :
1316 29 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_MULTIPOINT:
1317 58 : builder = MakeGeoArrowStructBuilder(m_poMemoryPool, nDim, 1,
1318 58 : m_apoBaseStructGeomType[i]);
1319 29 : break;
1320 :
1321 29 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_MULTILINESTRING:
1322 58 : builder = MakeGeoArrowStructBuilder(m_poMemoryPool, nDim, 2,
1323 58 : m_apoBaseStructGeomType[i]);
1324 29 : break;
1325 :
1326 41 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_MULTIPOLYGON:
1327 82 : builder = MakeGeoArrowStructBuilder(m_poMemoryPool, nDim, 3,
1328 82 : m_apoBaseStructGeomType[i]);
1329 41 : break;
1330 :
1331 0 : case OGRArrowGeomEncoding::GEOARROW_FSL_GENERIC:
1332 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_GENERIC:
1333 0 : CPLAssert(false);
1334 : break;
1335 : }
1336 :
1337 475 : m_apoBuilders.emplace_back(builder);
1338 :
1339 475 : if (m_bWriteBBoxStruct)
1340 : {
1341 : m_apoBuildersBBOXXMin.emplace_back(
1342 276 : std::make_shared<arrow::FloatBuilder>(m_poMemoryPool));
1343 : m_apoBuildersBBOXYMin.emplace_back(
1344 276 : std::make_shared<arrow::FloatBuilder>(m_poMemoryPool));
1345 : m_apoBuildersBBOXXMax.emplace_back(
1346 276 : std::make_shared<arrow::FloatBuilder>(m_poMemoryPool));
1347 : m_apoBuildersBBOXYMax.emplace_back(
1348 276 : std::make_shared<arrow::FloatBuilder>(m_poMemoryPool));
1349 : m_apoBuildersBBOXStruct.emplace_back(
1350 552 : std::make_shared<arrow::StructBuilder>(
1351 276 : m_apoFieldsBBOX[i]->type(), m_poMemoryPool,
1352 2484 : std::vector<std::shared_ptr<arrow::ArrayBuilder>>{
1353 276 : m_apoBuildersBBOXXMin.back(),
1354 276 : m_apoBuildersBBOXYMin.back(),
1355 276 : m_apoBuildersBBOXXMax.back(),
1356 1932 : m_apoBuildersBBOXYMax.back()}));
1357 : }
1358 : }
1359 484 : }
1360 :
1361 : /************************************************************************/
1362 : /* castToFloatDown() */
1363 : /************************************************************************/
1364 :
1365 : // Cf https://github.com/sqlite/sqlite/blob/90e4a3b7fcdf63035d6f35eb44d11ff58ff4b068/ext/rtree/rtree.c#L2993C1-L2995C3
1366 : /*
1367 : ** Rounding constants for float->double conversion.
1368 : */
1369 : #define RNDTOWARDS (1.0 - 1.0 / 8388608.0) /* Round towards zero */
1370 : #define RNDAWAY (1.0 + 1.0 / 8388608.0) /* Round away from zero */
1371 :
1372 : /*
1373 : ** Convert an sqlite3_value into an RtreeValue (presumably a float)
1374 : ** while taking care to round toward negative or positive, respectively.
1375 : */
1376 3836 : static float castToFloatDown(double d)
1377 : {
1378 3836 : float f = static_cast<float>(d);
1379 3836 : if (f > d)
1380 : {
1381 17 : f = static_cast<float>(d * (d < 0 ? RNDAWAY : RNDTOWARDS));
1382 : }
1383 3836 : return f;
1384 : }
1385 :
1386 3836 : static float castToFloatUp(double d)
1387 : {
1388 3836 : float f = static_cast<float>(d);
1389 3836 : if (f < d)
1390 : {
1391 12 : f = static_cast<float>(d * (d < 0 ? RNDTOWARDS : RNDAWAY));
1392 : }
1393 3836 : return f;
1394 : }
1395 :
1396 : /************************************************************************/
1397 : /* GeoArrowLineBuilder() */
1398 : /************************************************************************/
1399 :
1400 : template <class PointBuilderType>
1401 676 : static OGRErr GeoArrowLineBuilder(const OGRLineString *poLS,
1402 : PointBuilderType *poPointBuilder,
1403 : arrow::DoubleBuilder *poXBuilder,
1404 : arrow::DoubleBuilder *poYBuilder,
1405 : arrow::DoubleBuilder *poZBuilder,
1406 : arrow::DoubleBuilder *poMBuilder)
1407 : {
1408 3076 : for (int j = 0; j < poLS->getNumPoints(); ++j)
1409 : {
1410 2400 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poPointBuilder->Append());
1411 2400 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poXBuilder->Append(poLS->getX(j)));
1412 2400 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poYBuilder->Append(poLS->getY(j)));
1413 2400 : if (poZBuilder)
1414 668 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poZBuilder->Append(poLS->getZ(j)));
1415 2400 : if (poMBuilder)
1416 220 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poMBuilder->Append(poLS->getM(j)));
1417 : }
1418 676 : return OGRERR_NONE;
1419 : }
1420 :
1421 : /************************************************************************/
1422 : /* BuildGeometry() */
1423 : /************************************************************************/
1424 :
1425 3892 : inline OGRErr OGRArrowWriterLayer::BuildGeometry(OGRGeometry *poGeom,
1426 : int iGeomField,
1427 : arrow::ArrayBuilder *poBuilder)
1428 : {
1429 3892 : const auto eGType = poGeom ? poGeom->getGeometryType() : wkbNone;
1430 : const auto eColumnGType =
1431 3892 : m_poFeatureDefn->GetGeomFieldDefn(iGeomField)->GetType();
1432 3892 : const bool bHasZ = CPL_TO_BOOL(OGR_GT_HasZ(eColumnGType));
1433 3892 : const bool bHasM = CPL_TO_BOOL(OGR_GT_HasM(eColumnGType));
1434 3892 : const bool bIsEmpty = poGeom != nullptr && poGeom->IsEmpty();
1435 3892 : OGREnvelope3D oEnvelope;
1436 3892 : if (poGeom != nullptr && !bIsEmpty)
1437 : {
1438 2258 : if (poGeom->Is3D())
1439 : {
1440 322 : poGeom->getEnvelope(&oEnvelope);
1441 322 : m_aoEnvelopes[iGeomField].Merge(oEnvelope);
1442 : }
1443 : else
1444 : {
1445 1936 : poGeom->getEnvelope(static_cast<OGREnvelope *>(&oEnvelope));
1446 1936 : m_aoEnvelopes[iGeomField].Merge(oEnvelope);
1447 : }
1448 2258 : m_oSetWrittenGeometryTypes[iGeomField].insert(eGType);
1449 : }
1450 :
1451 3892 : if (m_bWriteBBoxStruct)
1452 : {
1453 3185 : if (poGeom && !bIsEmpty)
1454 : {
1455 1867 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1456 : m_apoBuildersBBOXXMin[iGeomField]->Append(
1457 : castToFloatDown(oEnvelope.MinX)));
1458 1867 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1459 : m_apoBuildersBBOXYMin[iGeomField]->Append(
1460 : castToFloatDown(oEnvelope.MinY)));
1461 1867 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1462 : m_apoBuildersBBOXXMax[iGeomField]->Append(
1463 : castToFloatUp(oEnvelope.MaxX)));
1464 1867 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1465 : m_apoBuildersBBOXYMax[iGeomField]->Append(
1466 : castToFloatUp(oEnvelope.MaxY)));
1467 1867 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1468 : m_apoBuildersBBOXStruct[iGeomField]->Append());
1469 : }
1470 : else
1471 : {
1472 1318 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1473 : m_apoBuildersBBOXStruct[iGeomField]->AppendNull());
1474 : }
1475 : }
1476 :
1477 3892 : if (poGeom == nullptr)
1478 : {
1479 4068 : if (m_aeGeomEncoding[iGeomField] ==
1480 1364 : OGRArrowGeomEncoding::GEOARROW_FSL_POINT &&
1481 1364 : GetDriverUCName() == "PARQUET")
1482 : {
1483 : // For some reason, Parquet doesn't support a NULL FixedSizeList
1484 : // on reading
1485 4 : auto poPointBuilder =
1486 : static_cast<arrow::FixedSizeListBuilder *>(poBuilder);
1487 4 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poPointBuilder->Append());
1488 : auto poValueBuilder = static_cast<arrow::DoubleBuilder *>(
1489 4 : poPointBuilder->value_builder());
1490 4 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poValueBuilder->Append(
1491 : std::numeric_limits<double>::quiet_NaN()));
1492 4 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poValueBuilder->Append(
1493 : std::numeric_limits<double>::quiet_NaN()));
1494 4 : if (bHasZ)
1495 2 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poValueBuilder->Append(
1496 : std::numeric_limits<double>::quiet_NaN()));
1497 4 : if (bHasM)
1498 0 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poValueBuilder->Append(
1499 : std::numeric_limits<double>::quiet_NaN()));
1500 : }
1501 : else
1502 : {
1503 1352 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poBuilder->AppendNull());
1504 : }
1505 :
1506 1356 : return OGRERR_NONE;
1507 : }
1508 :
1509 : // The following checks are only valid for GeoArrow encoding
1510 3714 : if (m_aeGeomEncoding[iGeomField] != OGRArrowGeomEncoding::WKB &&
1511 1178 : m_aeGeomEncoding[iGeomField] != OGRArrowGeomEncoding::WKT)
1512 : {
1513 1114 : if ((!bIsEmpty && eGType != eColumnGType) ||
1514 244 : (bIsEmpty && wkbFlatten(eGType) != wkbFlatten(eColumnGType)))
1515 : {
1516 6 : CPLError(CE_Warning, CPLE_AppDefined,
1517 : "Geometry of type %s found, whereas %s is expected. "
1518 : "Writing null geometry",
1519 : OGRGeometryTypeToName(eGType),
1520 : OGRGeometryTypeToName(eColumnGType));
1521 6 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poBuilder->AppendNull());
1522 :
1523 6 : return OGRERR_NONE;
1524 : }
1525 : }
1526 :
1527 2530 : switch (m_aeGeomEncoding[iGeomField])
1528 : {
1529 1358 : case OGRArrowGeomEncoding::WKB:
1530 : {
1531 0 : std::unique_ptr<OGRGeometry> poGeomModified;
1532 1358 : if (OGR_GT_HasM(eGType) && !OGR_GT_HasM(eColumnGType))
1533 : {
1534 : static bool bHasWarned = false;
1535 0 : if (!bHasWarned)
1536 : {
1537 0 : CPLError(CE_Warning, CPLE_AppDefined,
1538 : "Removing M component from geometry");
1539 0 : bHasWarned = true;
1540 : }
1541 0 : poGeomModified.reset(poGeom->clone());
1542 0 : poGeomModified->setMeasured(false);
1543 0 : poGeom = poGeomModified.get();
1544 : }
1545 1358 : FixupGeometryBeforeWriting(poGeom);
1546 1358 : const auto nSize = poGeom->WkbSize();
1547 1358 : if (nSize < INT_MAX)
1548 : {
1549 1358 : m_abyBuffer.resize(nSize);
1550 1358 : poGeom->exportToWkb(wkbNDR, &m_abyBuffer[0], wkbVariantIso);
1551 1358 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1552 : static_cast<arrow::BinaryBuilder *>(poBuilder)->Append(
1553 : m_abyBuffer.data(),
1554 : static_cast<int>(m_abyBuffer.size())));
1555 : }
1556 : else
1557 : {
1558 0 : CPLError(CE_Warning, CPLE_AppDefined,
1559 : "Too big geometry. "
1560 : "Writing null geometry");
1561 0 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poBuilder->AppendNull());
1562 : }
1563 1358 : break;
1564 : }
1565 :
1566 308 : case OGRArrowGeomEncoding::WKT:
1567 : {
1568 308 : OGRWktOptions options;
1569 308 : options.variant = wkbVariantIso;
1570 308 : if (m_nWKTCoordinatePrecision >= 0)
1571 : {
1572 0 : options.format = OGRWktFormat::F;
1573 0 : options.xyPrecision = m_nWKTCoordinatePrecision;
1574 0 : options.zPrecision = m_nWKTCoordinatePrecision;
1575 0 : options.mPrecision = m_nWKTCoordinatePrecision;
1576 : }
1577 308 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1578 : static_cast<arrow::StringBuilder *>(poBuilder)->Append(
1579 : poGeom->exportToWkt(options)));
1580 308 : break;
1581 : }
1582 :
1583 20 : case OGRArrowGeomEncoding::GEOARROW_FSL_POINT:
1584 : {
1585 20 : const auto poPoint = poGeom->toPoint();
1586 20 : auto poPointBuilder =
1587 : static_cast<arrow::FixedSizeListBuilder *>(poBuilder);
1588 20 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poPointBuilder->Append());
1589 : auto poValueBuilder = static_cast<arrow::DoubleBuilder *>(
1590 20 : poPointBuilder->value_builder());
1591 20 : if (bIsEmpty)
1592 : {
1593 8 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poValueBuilder->Append(
1594 : std::numeric_limits<double>::quiet_NaN()));
1595 8 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poValueBuilder->Append(
1596 : std::numeric_limits<double>::quiet_NaN()));
1597 8 : if (bHasZ)
1598 4 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poValueBuilder->Append(
1599 : std::numeric_limits<double>::quiet_NaN()));
1600 8 : if (bHasM)
1601 2 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poValueBuilder->Append(
1602 : std::numeric_limits<double>::quiet_NaN()));
1603 : }
1604 : else
1605 : {
1606 12 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1607 : poValueBuilder->Append(poPoint->getX()));
1608 12 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1609 : poValueBuilder->Append(poPoint->getY()));
1610 12 : if (bHasZ)
1611 6 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1612 : poValueBuilder->Append(poPoint->getZ()));
1613 12 : if (bHasM)
1614 2 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1615 : poValueBuilder->Append(poPoint->getM()));
1616 : }
1617 20 : break;
1618 : }
1619 :
1620 : #define GET_XYZM_STRUCT_FIELD_BUILDERS_FROM(poPointBuilder) \
1621 : auto poXBuilder = \
1622 : static_cast<arrow::DoubleBuilder *>(poPointBuilder->field_builder(0)); \
1623 : auto poYBuilder = \
1624 : static_cast<arrow::DoubleBuilder *>(poPointBuilder->field_builder(1)); \
1625 : int iSubField = 2; \
1626 : arrow::DoubleBuilder *poZBuilder = nullptr; \
1627 : if (bHasZ) \
1628 : { \
1629 : poZBuilder = static_cast<arrow::DoubleBuilder *>( \
1630 : poPointBuilder->field_builder(iSubField)); \
1631 : ++iSubField; \
1632 : } \
1633 : arrow::DoubleBuilder *poMBuilder = nullptr; \
1634 : if (bHasM) \
1635 : { \
1636 : poMBuilder = static_cast<arrow::DoubleBuilder *>( \
1637 : poPointBuilder->field_builder(iSubField)); \
1638 : } \
1639 : do \
1640 : { \
1641 : } while (0)
1642 :
1643 109 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_POINT:
1644 : {
1645 109 : const auto poPoint = poGeom->toPoint();
1646 109 : auto poPointBuilder =
1647 : static_cast<arrow::StructBuilder *>(poBuilder);
1648 109 : GET_XYZM_STRUCT_FIELD_BUILDERS_FROM(poPointBuilder);
1649 109 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poPointBuilder->Append());
1650 :
1651 109 : if (bIsEmpty)
1652 : {
1653 28 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poXBuilder->Append(
1654 : std::numeric_limits<double>::quiet_NaN()));
1655 28 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poYBuilder->Append(
1656 : std::numeric_limits<double>::quiet_NaN()));
1657 : }
1658 : else
1659 : {
1660 81 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1661 : poXBuilder->Append(poPoint->getX()));
1662 81 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1663 : poYBuilder->Append(poPoint->getY()));
1664 : }
1665 109 : if (poZBuilder)
1666 : {
1667 40 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poZBuilder->Append(
1668 : bIsEmpty ? std::numeric_limits<double>::quiet_NaN()
1669 : : poPoint->getZ()));
1670 : }
1671 109 : if (poMBuilder)
1672 : {
1673 4 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poMBuilder->Append(
1674 : bIsEmpty ? std::numeric_limits<double>::quiet_NaN()
1675 : : poPoint->getM()));
1676 : }
1677 109 : break;
1678 : }
1679 :
1680 20 : case OGRArrowGeomEncoding::GEOARROW_FSL_LINESTRING:
1681 : {
1682 20 : const auto poLS = poGeom->toLineString();
1683 20 : auto poListBuilder = static_cast<arrow::ListBuilder *>(poBuilder);
1684 : auto poPointBuilder = static_cast<arrow::FixedSizeListBuilder *>(
1685 20 : poListBuilder->value_builder());
1686 : auto poValueBuilder = static_cast<arrow::DoubleBuilder *>(
1687 20 : poPointBuilder->value_builder());
1688 :
1689 20 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poListBuilder->Append());
1690 20 : OGR_ARROW_PROPAGATE_OGRERR(GeoArrowLineBuilder(
1691 : poLS, poPointBuilder, poValueBuilder, poValueBuilder,
1692 : bHasZ ? poValueBuilder : nullptr,
1693 : bHasM ? poValueBuilder : nullptr));
1694 20 : break;
1695 : }
1696 :
1697 81 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_LINESTRING:
1698 : {
1699 81 : const auto poLS = poGeom->toLineString();
1700 81 : auto poListBuilder = static_cast<arrow::ListBuilder *>(poBuilder);
1701 : auto poPointBuilder = static_cast<arrow::StructBuilder *>(
1702 81 : poListBuilder->value_builder());
1703 81 : GET_XYZM_STRUCT_FIELD_BUILDERS_FROM(poPointBuilder);
1704 :
1705 81 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poListBuilder->Append());
1706 81 : OGR_ARROW_PROPAGATE_OGRERR(
1707 : GeoArrowLineBuilder(poLS, poPointBuilder, poXBuilder,
1708 : poYBuilder, poZBuilder, poMBuilder));
1709 81 : break;
1710 : }
1711 :
1712 32 : case OGRArrowGeomEncoding::GEOARROW_FSL_POLYGON:
1713 : {
1714 32 : const auto poPolygon = poGeom->toPolygon();
1715 32 : auto poPolygonBuilder =
1716 : static_cast<arrow::ListBuilder *>(poBuilder);
1717 : auto poRingBuilder = static_cast<arrow::ListBuilder *>(
1718 32 : poPolygonBuilder->value_builder());
1719 : auto poPointBuilder = static_cast<arrow::FixedSizeListBuilder *>(
1720 32 : poRingBuilder->value_builder());
1721 : auto poValueBuilder = static_cast<arrow::DoubleBuilder *>(
1722 32 : poPointBuilder->value_builder());
1723 32 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poPolygonBuilder->Append());
1724 62 : for (const auto *poRing : *poPolygon)
1725 : {
1726 30 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poRingBuilder->Append());
1727 30 : OGR_ARROW_PROPAGATE_OGRERR(GeoArrowLineBuilder(
1728 : poRing, poPointBuilder, poValueBuilder, poValueBuilder,
1729 : bHasZ ? poValueBuilder : nullptr,
1730 : bHasM ? poValueBuilder : nullptr));
1731 : }
1732 32 : break;
1733 : }
1734 :
1735 133 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_POLYGON:
1736 : {
1737 133 : const auto poPolygon = poGeom->toPolygon();
1738 133 : auto poPolygonBuilder =
1739 : static_cast<arrow::ListBuilder *>(poBuilder);
1740 : auto poRingBuilder = static_cast<arrow::ListBuilder *>(
1741 133 : poPolygonBuilder->value_builder());
1742 : auto poPointBuilder = static_cast<arrow::StructBuilder *>(
1743 133 : poRingBuilder->value_builder());
1744 133 : GET_XYZM_STRUCT_FIELD_BUILDERS_FROM(poPointBuilder);
1745 :
1746 133 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poPolygonBuilder->Append());
1747 254 : for (const auto *poRing : *poPolygon)
1748 : {
1749 121 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poRingBuilder->Append());
1750 121 : OGR_ARROW_PROPAGATE_OGRERR(
1751 : GeoArrowLineBuilder(poRing, poPointBuilder, poXBuilder,
1752 : poYBuilder, poZBuilder, poMBuilder));
1753 : }
1754 133 : break;
1755 : }
1756 :
1757 32 : case OGRArrowGeomEncoding::GEOARROW_FSL_MULTIPOINT:
1758 : {
1759 32 : const auto poMultiPoint = poGeom->toMultiPoint();
1760 32 : auto poListBuilder = static_cast<arrow::ListBuilder *>(poBuilder);
1761 : auto poPointBuilder = static_cast<arrow::FixedSizeListBuilder *>(
1762 32 : poListBuilder->value_builder());
1763 : auto poValueBuilder = static_cast<arrow::DoubleBuilder *>(
1764 32 : poPointBuilder->value_builder());
1765 32 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poListBuilder->Append());
1766 88 : for (const auto *poPoint : *poMultiPoint)
1767 : {
1768 56 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poPointBuilder->Append());
1769 56 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1770 : poValueBuilder->Append(poPoint->getX()));
1771 56 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1772 : poValueBuilder->Append(poPoint->getY()));
1773 56 : if (bHasZ)
1774 28 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1775 : poValueBuilder->Append(poPoint->getZ()));
1776 56 : if (bHasM)
1777 18 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1778 : poValueBuilder->Append(poPoint->getM()));
1779 : }
1780 32 : break;
1781 : }
1782 :
1783 113 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_MULTIPOINT:
1784 : {
1785 113 : const auto poMultiPoint = poGeom->toMultiPoint();
1786 113 : auto poListBuilder = static_cast<arrow::ListBuilder *>(poBuilder);
1787 : auto poPointBuilder = static_cast<arrow::StructBuilder *>(
1788 113 : poListBuilder->value_builder());
1789 113 : GET_XYZM_STRUCT_FIELD_BUILDERS_FROM(poPointBuilder);
1790 :
1791 113 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poListBuilder->Append());
1792 270 : for (const auto *poPoint : *poMultiPoint)
1793 : {
1794 157 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poPointBuilder->Append());
1795 157 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1796 : poXBuilder->Append(poPoint->getX()));
1797 157 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1798 : poYBuilder->Append(poPoint->getY()));
1799 157 : if (poZBuilder)
1800 78 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1801 : poZBuilder->Append(poPoint->getZ()));
1802 157 : if (poMBuilder)
1803 18 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1804 : poMBuilder->Append(poPoint->getM()));
1805 : }
1806 113 : break;
1807 : }
1808 :
1809 28 : case OGRArrowGeomEncoding::GEOARROW_FSL_MULTILINESTRING:
1810 : {
1811 28 : const auto poMLS = poGeom->toMultiLineString();
1812 28 : auto poMLSBuilder = static_cast<arrow::ListBuilder *>(poBuilder);
1813 : auto poLSBuilder = static_cast<arrow::ListBuilder *>(
1814 28 : poMLSBuilder->value_builder());
1815 : auto poPointBuilder = static_cast<arrow::FixedSizeListBuilder *>(
1816 28 : poLSBuilder->value_builder());
1817 : auto poValueBuilder = static_cast<arrow::DoubleBuilder *>(
1818 28 : poPointBuilder->value_builder());
1819 28 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poMLSBuilder->Append());
1820 60 : for (const auto *poLS : *poMLS)
1821 : {
1822 32 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poLSBuilder->Append());
1823 32 : OGR_ARROW_PROPAGATE_OGRERR(GeoArrowLineBuilder(
1824 : poLS, poPointBuilder, poValueBuilder, poValueBuilder,
1825 : bHasZ ? poValueBuilder : nullptr,
1826 : bHasM ? poValueBuilder : nullptr));
1827 : }
1828 28 : break;
1829 : }
1830 :
1831 109 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_MULTILINESTRING:
1832 : {
1833 109 : const auto poMLS = poGeom->toMultiLineString();
1834 109 : auto poMLSBuilder = static_cast<arrow::ListBuilder *>(poBuilder);
1835 : auto poLSBuilder = static_cast<arrow::ListBuilder *>(
1836 109 : poMLSBuilder->value_builder());
1837 : auto poPointBuilder = static_cast<arrow::StructBuilder *>(
1838 109 : poLSBuilder->value_builder());
1839 109 : GET_XYZM_STRUCT_FIELD_BUILDERS_FROM(poPointBuilder);
1840 :
1841 109 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poMLSBuilder->Append());
1842 242 : for (const auto *poLS : *poMLS)
1843 : {
1844 133 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poLSBuilder->Append());
1845 133 : OGR_ARROW_PROPAGATE_OGRERR(
1846 : GeoArrowLineBuilder(poLS, poPointBuilder, poXBuilder,
1847 : poYBuilder, poZBuilder, poMBuilder));
1848 : }
1849 109 : break;
1850 : }
1851 :
1852 38 : case OGRArrowGeomEncoding::GEOARROW_FSL_MULTIPOLYGON:
1853 : {
1854 38 : const auto poMPoly = poGeom->toMultiPolygon();
1855 38 : auto poMPolyBuilder = static_cast<arrow::ListBuilder *>(poBuilder);
1856 : auto poPolyBuilder = static_cast<arrow::ListBuilder *>(
1857 38 : poMPolyBuilder->value_builder());
1858 : auto poRingBuilder = static_cast<arrow::ListBuilder *>(
1859 38 : poPolyBuilder->value_builder());
1860 : auto poPointBuilder = static_cast<arrow::FixedSizeListBuilder *>(
1861 38 : poRingBuilder->value_builder());
1862 : auto poValueBuilder = static_cast<arrow::DoubleBuilder *>(
1863 38 : poPointBuilder->value_builder());
1864 38 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poMPolyBuilder->Append());
1865 82 : for (const auto *poPolygon : *poMPoly)
1866 : {
1867 44 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poPolyBuilder->Append());
1868 98 : for (const auto *poRing : *poPolygon)
1869 : {
1870 54 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poRingBuilder->Append());
1871 54 : OGR_ARROW_PROPAGATE_OGRERR(GeoArrowLineBuilder(
1872 : poRing, poPointBuilder, poValueBuilder, poValueBuilder,
1873 : bHasZ ? poValueBuilder : nullptr,
1874 : bHasM ? poValueBuilder : nullptr));
1875 : }
1876 : }
1877 38 : break;
1878 : }
1879 :
1880 149 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_MULTIPOLYGON:
1881 : {
1882 149 : const auto poMPoly = poGeom->toMultiPolygon();
1883 149 : auto poMPolyBuilder = static_cast<arrow::ListBuilder *>(poBuilder);
1884 : auto poPolyBuilder = static_cast<arrow::ListBuilder *>(
1885 149 : poMPolyBuilder->value_builder());
1886 : auto poRingBuilder = static_cast<arrow::ListBuilder *>(
1887 149 : poPolyBuilder->value_builder());
1888 : auto poPointBuilder = static_cast<arrow::StructBuilder *>(
1889 149 : poRingBuilder->value_builder());
1890 149 : GET_XYZM_STRUCT_FIELD_BUILDERS_FROM(poPointBuilder);
1891 :
1892 149 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poMPolyBuilder->Append());
1893 314 : for (const auto *poPolygon : *poMPoly)
1894 : {
1895 165 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poPolyBuilder->Append());
1896 370 : for (const auto *poRing : *poPolygon)
1897 : {
1898 205 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poRingBuilder->Append());
1899 205 : OGR_ARROW_PROPAGATE_OGRERR(GeoArrowLineBuilder(
1900 : poRing, poPointBuilder, poXBuilder, poYBuilder,
1901 : poZBuilder, poMBuilder));
1902 : }
1903 : }
1904 149 : break;
1905 : }
1906 :
1907 0 : case OGRArrowGeomEncoding::GEOARROW_FSL_GENERIC:
1908 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_GENERIC:
1909 : {
1910 0 : CPLAssert(false);
1911 : break;
1912 : }
1913 : }
1914 :
1915 2530 : return OGRERR_NONE;
1916 : }
1917 :
1918 : /************************************************************************/
1919 : /* ICreateFeature() */
1920 : /************************************************************************/
1921 :
1922 3441 : inline OGRErr OGRArrowWriterLayer::ICreateFeature(OGRFeature *poFeature)
1923 : {
1924 3441 : if (m_poSchema == nullptr)
1925 : {
1926 315 : CreateSchema();
1927 : }
1928 :
1929 3441 : if (m_apoBuilders.empty())
1930 : {
1931 353 : if (!m_apoFieldsFromArrowSchema.empty())
1932 : {
1933 0 : CPLError(CE_Failure, CPLE_NotSupported,
1934 : "ICreateFeature() cannot be used after "
1935 : "CreateFieldFromArrowSchema()");
1936 0 : return OGRERR_FAILURE;
1937 : }
1938 353 : CreateArrayBuilders();
1939 : }
1940 :
1941 : // First pass to check not-null constraints as Arrow doesn't seem
1942 : // to do that on the writing side. But such files can't be read.
1943 3441 : const int nFieldCount = m_poFeatureDefn->GetFieldCount();
1944 9048 : for (int i = 0; i < nFieldCount; ++i)
1945 : {
1946 5608 : const auto poFieldDefn = m_poFeatureDefn->GetFieldDefn(i);
1947 5610 : if (!poFieldDefn->IsNullable() &&
1948 2 : !poFeature->IsFieldSetAndNotNullUnsafe(i))
1949 : {
1950 1 : CPLError(CE_Failure, CPLE_AppDefined,
1951 : "Null value found in non-nullable field %s",
1952 : poFieldDefn->GetNameRef());
1953 1 : return OGRERR_FAILURE;
1954 : }
1955 : }
1956 :
1957 3440 : const int nGeomFieldCount = m_poFeatureDefn->GetGeomFieldCount();
1958 6983 : for (int i = 0; i < nGeomFieldCount; ++i)
1959 : {
1960 3543 : const auto poGeomFieldDefn = m_poFeatureDefn->GetGeomFieldDefn(i);
1961 3575 : if (!poGeomFieldDefn->IsNullable() &&
1962 32 : poFeature->GetGeomFieldRef(i) == nullptr)
1963 : {
1964 0 : CPLError(CE_Failure, CPLE_AppDefined,
1965 : "Null value found in non-nullable geometry field %s",
1966 : poGeomFieldDefn->GetNameRef());
1967 0 : return OGRERR_FAILURE;
1968 : }
1969 : }
1970 :
1971 : // Write FID, if FID column present
1972 3440 : int nArrowIdx = 0;
1973 3440 : if (!m_osFIDColumn.empty())
1974 : {
1975 2261 : int64_t nFID = poFeature->GetFID();
1976 2261 : if (nFID == OGRNullFID)
1977 : {
1978 37 : nFID = m_nFeatureCount;
1979 37 : poFeature->SetFID(nFID);
1980 : }
1981 : auto poBuilder =
1982 2261 : static_cast<arrow::Int64Builder *>(m_apoBuilders[0].get());
1983 2261 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poBuilder->Append(nFID));
1984 2261 : nArrowIdx++;
1985 : }
1986 :
1987 : // Write attributes
1988 9047 : for (int i = 0; i < nFieldCount; ++i, ++nArrowIdx)
1989 : {
1990 5607 : auto poBuilder = m_apoBuilders[nArrowIdx].get();
1991 5607 : if (!poFeature->IsFieldSetAndNotNullUnsafe(i))
1992 : {
1993 1248 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poBuilder->AppendNull());
1994 1248 : continue;
1995 : }
1996 :
1997 4359 : const auto poFieldDefn = m_poFeatureDefn->GetFieldDefn(i);
1998 4359 : const auto eSubDT = poFieldDefn->GetSubType();
1999 4359 : switch (poFieldDefn->GetType())
2000 : {
2001 2485 : case OFTInteger:
2002 2485 : if (eSubDT == OFSTBoolean)
2003 16 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
2004 : static_cast<arrow::BooleanBuilder *>(poBuilder)->Append(
2005 : poFeature->GetFieldAsIntegerUnsafe(i) != 0));
2006 2469 : else if (eSubDT == OFSTInt16)
2007 16 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
2008 : static_cast<arrow::Int16Builder *>(poBuilder)->Append(
2009 : static_cast<int16_t>(
2010 : poFeature->GetFieldAsIntegerUnsafe(i))));
2011 : else
2012 2453 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
2013 : static_cast<arrow::Int32Builder *>(poBuilder)->Append(
2014 : poFeature->GetFieldAsIntegerUnsafe(i)));
2015 2485 : break;
2016 :
2017 172 : case OFTInteger64:
2018 172 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
2019 : static_cast<arrow::Int64Builder *>(poBuilder)->Append(
2020 : static_cast<int64_t>(
2021 : poFeature->GetFieldAsInteger64Unsafe(i))));
2022 172 : break;
2023 :
2024 236 : case OFTReal:
2025 : {
2026 236 : const auto arrowType = m_poSchema->fields()[nArrowIdx]->type();
2027 236 : const double dfVal = poFeature->GetFieldAsDoubleUnsafe(i);
2028 236 : if (arrowType->id() == arrow::Type::DECIMAL128)
2029 : {
2030 : auto res = arrow::Decimal128::FromReal(
2031 : dfVal, poFieldDefn->GetWidth(),
2032 62 : poFieldDefn->GetPrecision());
2033 62 : if (res.ok())
2034 : {
2035 62 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
2036 : static_cast<arrow::Decimal128Builder *>(poBuilder)
2037 : ->Append(*res));
2038 : }
2039 : else
2040 : {
2041 0 : CPLError(CE_Warning, CPLE_AppDefined,
2042 : "Cannot parse %.18g as a %d.%d decimal", dfVal,
2043 : poFieldDefn->GetWidth(),
2044 : poFieldDefn->GetPrecision());
2045 0 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poBuilder->AppendNull());
2046 : }
2047 : }
2048 174 : else if (arrowType->id() == arrow::Type::DECIMAL256)
2049 : {
2050 : auto res = arrow::Decimal256::FromReal(
2051 : dfVal, poFieldDefn->GetWidth(),
2052 0 : poFieldDefn->GetPrecision());
2053 0 : if (res.ok())
2054 : {
2055 0 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
2056 : static_cast<arrow::Decimal256Builder *>(poBuilder)
2057 : ->Append(*res));
2058 : }
2059 : else
2060 : {
2061 0 : CPLError(CE_Warning, CPLE_AppDefined,
2062 : "Cannot parse %.18g as a %d.%d decimal", dfVal,
2063 : poFieldDefn->GetWidth(),
2064 : poFieldDefn->GetPrecision());
2065 0 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poBuilder->AppendNull());
2066 : }
2067 : }
2068 174 : else if (eSubDT == OFSTFloat32)
2069 : {
2070 28 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
2071 : static_cast<arrow::FloatBuilder *>(poBuilder)->Append(
2072 : static_cast<float>(dfVal)));
2073 : }
2074 : else
2075 : {
2076 146 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
2077 : static_cast<arrow::DoubleBuilder *>(poBuilder)->Append(
2078 : dfVal));
2079 : }
2080 236 : break;
2081 : }
2082 :
2083 545 : case OFTString:
2084 : case OFTWideString:
2085 545 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
2086 : static_cast<arrow::StringBuilder *>(poBuilder)->Append(
2087 : poFeature->GetFieldAsStringUnsafe(i)));
2088 545 : break;
2089 :
2090 62 : case OFTBinary:
2091 : {
2092 62 : int nSize = 0;
2093 62 : const auto pData = poFeature->GetFieldAsBinary(i, &nSize);
2094 62 : if (poFieldDefn->GetWidth() != 0)
2095 : {
2096 20 : if (poFieldDefn->GetWidth() != nSize)
2097 : {
2098 0 : CPLError(
2099 : CE_Warning, CPLE_AppDefined,
2100 : "Cannot write field %s. Got %d bytes, expected %d",
2101 : poFieldDefn->GetNameRef(), nSize,
2102 : poFieldDefn->GetWidth());
2103 0 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poBuilder->AppendNull());
2104 : }
2105 : else
2106 : {
2107 20 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
2108 : static_cast<arrow::FixedSizeBinaryBuilder *>(
2109 : poBuilder)
2110 : ->Append(pData));
2111 : }
2112 : }
2113 : else
2114 42 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
2115 : static_cast<arrow::BinaryBuilder *>(poBuilder)->Append(
2116 : pData, nSize));
2117 62 : break;
2118 : }
2119 :
2120 216 : case OFTIntegerList:
2121 : {
2122 216 : auto poListBuilder =
2123 : static_cast<arrow::ListBuilder *>(poBuilder);
2124 216 : if (eSubDT == OFSTBoolean)
2125 : {
2126 36 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poListBuilder->Append());
2127 : auto poValueBuilder = static_cast<arrow::BooleanBuilder *>(
2128 36 : poListBuilder->value_builder());
2129 36 : int nValues = 0;
2130 : const auto panValues =
2131 36 : poFeature->GetFieldAsIntegerList(i, &nValues);
2132 108 : for (int j = 0; j < nValues; ++j)
2133 72 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
2134 : poValueBuilder->Append(panValues[j] != 0));
2135 : }
2136 180 : else if (eSubDT == OFSTInt16)
2137 : {
2138 0 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poListBuilder->Append());
2139 : auto poValueBuilder = static_cast<arrow::Int16Builder *>(
2140 0 : poListBuilder->value_builder());
2141 0 : int nValues = 0;
2142 : const auto panValues =
2143 0 : poFeature->GetFieldAsIntegerList(i, &nValues);
2144 0 : for (int j = 0; j < nValues; ++j)
2145 0 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poValueBuilder->Append(
2146 : static_cast<int16_t>(panValues[j])));
2147 : }
2148 : else
2149 : {
2150 180 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poListBuilder->Append());
2151 : auto poValueBuilder = static_cast<arrow::Int32Builder *>(
2152 180 : poListBuilder->value_builder());
2153 180 : int nValues = 0;
2154 : const auto panValues =
2155 180 : poFeature->GetFieldAsIntegerList(i, &nValues);
2156 540 : for (int j = 0; j < nValues; ++j)
2157 360 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
2158 : poValueBuilder->Append(panValues[j]));
2159 : }
2160 216 : break;
2161 : }
2162 :
2163 92 : case OFTInteger64List:
2164 : {
2165 92 : auto poListBuilder =
2166 : static_cast<arrow::ListBuilder *>(poBuilder);
2167 92 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poListBuilder->Append());
2168 : auto poValueBuilder = static_cast<arrow::Int64Builder *>(
2169 92 : poListBuilder->value_builder());
2170 92 : int nValues = 0;
2171 : const auto panValues =
2172 92 : poFeature->GetFieldAsInteger64List(i, &nValues);
2173 292 : for (int j = 0; j < nValues; ++j)
2174 200 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poValueBuilder->Append(
2175 : static_cast<int64_t>(panValues[j])));
2176 92 : break;
2177 : }
2178 :
2179 152 : case OFTRealList:
2180 : {
2181 152 : auto poListBuilder =
2182 : static_cast<arrow::ListBuilder *>(poBuilder);
2183 152 : if (eSubDT == OFSTFloat32)
2184 : {
2185 48 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poListBuilder->Append());
2186 : auto poValueBuilder = static_cast<arrow::FloatBuilder *>(
2187 48 : poListBuilder->value_builder());
2188 48 : int nValues = 0;
2189 : const auto padfValues =
2190 48 : poFeature->GetFieldAsDoubleList(i, &nValues);
2191 144 : for (int j = 0; j < nValues; ++j)
2192 96 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poValueBuilder->Append(
2193 : static_cast<float>(padfValues[j])));
2194 : }
2195 : else
2196 : {
2197 104 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poListBuilder->Append());
2198 : auto poValueBuilder = static_cast<arrow::DoubleBuilder *>(
2199 104 : poListBuilder->value_builder());
2200 104 : int nValues = 0;
2201 : const auto padfValues =
2202 104 : poFeature->GetFieldAsDoubleList(i, &nValues);
2203 280 : for (int j = 0; j < nValues; ++j)
2204 176 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
2205 : poValueBuilder->Append(padfValues[j]));
2206 : }
2207 152 : break;
2208 : }
2209 :
2210 52 : case OFTStringList:
2211 : case OFTWideStringList:
2212 : {
2213 52 : auto poListBuilder =
2214 : static_cast<arrow::ListBuilder *>(poBuilder);
2215 52 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poListBuilder->Append());
2216 : auto poValueBuilder = static_cast<arrow::StringBuilder *>(
2217 52 : poListBuilder->value_builder());
2218 52 : const auto papszValues = poFeature->GetFieldAsStringList(i);
2219 132 : for (int j = 0; papszValues && papszValues[j]; ++j)
2220 80 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
2221 : poValueBuilder->Append(papszValues[j]));
2222 52 : break;
2223 : }
2224 :
2225 109 : case OFTDate:
2226 : {
2227 : int nYear, nMonth, nDay, nHour, nMinute;
2228 : float fSec;
2229 : int nTZFlag;
2230 109 : poFeature->GetFieldAsDateTime(i, &nYear, &nMonth, &nDay, &nHour,
2231 : &nMinute, &fSec, &nTZFlag);
2232 : struct tm brokenDown;
2233 109 : memset(&brokenDown, 0, sizeof(brokenDown));
2234 109 : brokenDown.tm_year = nYear - 1900;
2235 109 : brokenDown.tm_mon = nMonth - 1;
2236 109 : brokenDown.tm_mday = nDay;
2237 109 : GIntBig nVal = CPLYMDHMSToUnixTime(&brokenDown);
2238 109 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
2239 : static_cast<arrow::Date32Builder *>(poBuilder)->Append(
2240 : static_cast<int>(nVal / 86400)));
2241 109 : break;
2242 : }
2243 :
2244 36 : case OFTTime:
2245 : {
2246 : int nYear, nMonth, nDay, nHour, nMinute;
2247 : float fSec;
2248 : int nTZFlag;
2249 36 : poFeature->GetFieldAsDateTime(i, &nYear, &nMonth, &nDay, &nHour,
2250 : &nMinute, &fSec, &nTZFlag);
2251 36 : int nVal = nHour * 3600 + nMinute * 60;
2252 36 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
2253 : static_cast<arrow::Time32Builder *>(poBuilder)->Append(
2254 : static_cast<int>(
2255 : (static_cast<double>(nVal) + fSec) * 1000 + 0.5)));
2256 36 : break;
2257 : }
2258 :
2259 202 : case OFTDateTime:
2260 : {
2261 : int nYear, nMonth, nDay, nHour, nMinute;
2262 : float fSec;
2263 : int nTZFlag;
2264 202 : poFeature->GetFieldAsDateTime(i, &nYear, &nMonth, &nDay, &nHour,
2265 : &nMinute, &fSec, &nTZFlag);
2266 : struct tm brokenDown;
2267 202 : memset(&brokenDown, 0, sizeof(brokenDown));
2268 202 : brokenDown.tm_year = nYear - 1900;
2269 202 : brokenDown.tm_mon = nMonth - 1;
2270 202 : brokenDown.tm_mday = nDay;
2271 202 : brokenDown.tm_hour = nHour;
2272 202 : brokenDown.tm_min = nMinute;
2273 202 : brokenDown.tm_sec = 0;
2274 202 : GIntBig nVal = CPLYMDHMSToUnixTime(&brokenDown);
2275 332 : if (!IsFileWriterCreated() &&
2276 130 : m_anTZFlag[i] != OGR_TZFLAG_UNKNOWN)
2277 : {
2278 72 : if (m_anTZFlag[i] == TZFLAG_UNINITIALIZED)
2279 35 : m_anTZFlag[i] = nTZFlag;
2280 37 : else if (m_anTZFlag[i] != nTZFlag)
2281 : {
2282 26 : if (m_anTZFlag[i] >= OGR_TZFLAG_MIXED_TZ &&
2283 13 : nTZFlag >= OGR_TZFLAG_MIXED_TZ)
2284 : {
2285 10 : m_anTZFlag[i] =
2286 : OGR_TZFLAG_MIXED_TZ; // harmonize on UTC ultimately
2287 : }
2288 : else
2289 : {
2290 3 : CPLError(CE_Warning, CPLE_AppDefined,
2291 : "Field %s contains a mix of "
2292 : "timezone-aware and local/without "
2293 : "timezone values.",
2294 : poFieldDefn->GetNameRef());
2295 3 : m_anTZFlag[i] = OGR_TZFLAG_UNKNOWN;
2296 : }
2297 : }
2298 : }
2299 202 : if (nTZFlag > OGR_TZFLAG_MIXED_TZ)
2300 : {
2301 70 : const int nOffsetSec = (nTZFlag - OGR_TZFLAG_UTC) * 15 * 60;
2302 70 : nVal -= nOffsetSec;
2303 : }
2304 202 : const int64_t nTimestamp = static_cast<int64_t>(
2305 202 : (static_cast<double>(nVal) + fSec) * 1000 + 0.5);
2306 : auto structBuilder =
2307 202 : dynamic_cast<arrow::StructBuilder *>(poBuilder);
2308 202 : if (structBuilder)
2309 : {
2310 10 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
2311 : static_cast<arrow::TimestampBuilder *>(
2312 : structBuilder->field_builder(0))
2313 : ->Append(nTimestamp));
2314 10 : const int16_t nUTCOffsetMin =
2315 10 : static_cast<int16_t>((nTZFlag - OGR_TZFLAG_UTC) * 15);
2316 10 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
2317 : static_cast<arrow::Int16Builder *>(
2318 : structBuilder->field_builder(1))
2319 : ->Append(nUTCOffsetMin));
2320 10 : OGR_ARROW_RETURN_OGRERR_NOT_OK(structBuilder->Append());
2321 : }
2322 : else
2323 : {
2324 192 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
2325 : static_cast<arrow::TimestampBuilder *>(poBuilder)
2326 : ->Append(nTimestamp));
2327 : }
2328 202 : break;
2329 : }
2330 : }
2331 : }
2332 :
2333 : // Write geometries
2334 6983 : for (int i = 0; i < nGeomFieldCount; ++i, ++nArrowIdx)
2335 : {
2336 3543 : auto poBuilder = m_apoBuilders[nArrowIdx].get();
2337 3543 : OGRGeometry *poGeom = poFeature->GetGeomFieldRef(i);
2338 3543 : if (BuildGeometry(poGeom, i, poBuilder) != OGRERR_NONE)
2339 0 : return OGRERR_FAILURE;
2340 : }
2341 :
2342 3440 : m_nFeatureCount++;
2343 :
2344 : // Flush the current row group if reaching the limit of rows per group.
2345 3440 : if (!m_apoBuilders.empty() && m_apoBuilders[0]->length() == m_nRowGroupSize)
2346 : {
2347 25 : if (!FlushFeatures())
2348 0 : return OGRERR_FAILURE;
2349 : }
2350 :
2351 3440 : return OGRERR_NONE;
2352 : }
2353 :
2354 : /************************************************************************/
2355 : /* FlushFeatures() */
2356 : /************************************************************************/
2357 :
2358 42 : inline bool OGRArrowWriterLayer::FlushFeatures()
2359 : {
2360 42 : if (m_apoBuilders.empty() || m_apoBuilders[0]->length() == 0)
2361 1 : return true;
2362 :
2363 41 : if (!IsFileWriterCreated())
2364 : {
2365 10 : CreateWriter();
2366 10 : if (!IsFileWriterCreated())
2367 0 : return false;
2368 : }
2369 :
2370 41 : return FlushGroup();
2371 : }
2372 :
2373 : /************************************************************************/
2374 : /* GetFeatureCount() */
2375 : /************************************************************************/
2376 :
2377 1 : inline GIntBig OGRArrowWriterLayer::GetFeatureCount(int bForce)
2378 : {
2379 1 : if (m_poAttrQuery == nullptr && m_poFilterGeom == nullptr)
2380 : {
2381 1 : return m_nFeatureCount;
2382 : }
2383 0 : return OGRLayer::GetFeatureCount(bForce);
2384 : }
2385 :
2386 : /************************************************************************/
2387 : /* TestCapability() */
2388 : /************************************************************************/
2389 :
2390 813 : inline int OGRArrowWriterLayer::TestCapability(const char *pszCap) const
2391 : {
2392 813 : if (EQUAL(pszCap, OLCCreateField) || EQUAL(pszCap, OLCCreateGeomField))
2393 27 : return m_poSchema == nullptr;
2394 :
2395 786 : if (EQUAL(pszCap, OLCSequentialWrite))
2396 24 : return true;
2397 :
2398 762 : if (EQUAL(pszCap, OLCFastWriteArrowBatch))
2399 0 : return true;
2400 :
2401 762 : if (EQUAL(pszCap, OLCStringsAsUTF8))
2402 1 : return true;
2403 :
2404 761 : if (EQUAL(pszCap, OLCMeasuredGeometries))
2405 315 : return true;
2406 :
2407 446 : return false;
2408 : }
2409 :
2410 : /************************************************************************/
2411 : /* WriteArrays() */
2412 : /************************************************************************/
2413 :
2414 355 : inline bool OGRArrowWriterLayer::WriteArrays(
2415 : std::function<bool(const std::shared_ptr<arrow::Field> &,
2416 : const std::shared_ptr<arrow::Array> &)>
2417 : postProcessArray)
2418 : {
2419 355 : int nArrowIdx = 0;
2420 355 : int nArrowIdxFirstField = !m_osFIDColumn.empty() ? 1 : 0;
2421 2233 : for (const auto &poBuilder : m_apoBuilders)
2422 : {
2423 1878 : const auto &field = m_poSchema->fields()[nArrowIdx];
2424 :
2425 0 : std::shared_ptr<arrow::Array> array;
2426 1878 : auto status = poBuilder->Finish(&array);
2427 1878 : if (!status.ok())
2428 : {
2429 0 : CPLError(CE_Failure, CPLE_AppDefined,
2430 : "builder::Finish() for field %s failed with %s",
2431 0 : field->name().c_str(), status.message().c_str());
2432 0 : return false;
2433 : }
2434 :
2435 : // CPLDebug("ARROW", "%s", array->ToString().c_str());
2436 :
2437 1878 : const int iCol = nArrowIdx - nArrowIdxFirstField;
2438 1878 : if (iCol >= 0 && iCol < m_poFeatureDefn->GetFieldCount())
2439 : {
2440 1482 : const auto poFieldDefn = m_poFeatureDefn->GetFieldDefn(iCol);
2441 1482 : const auto eFieldType = poFieldDefn->GetType();
2442 1482 : if (eFieldType == OFTInteger || eFieldType == OFTInteger64)
2443 : {
2444 261 : const auto &osDomainName = poFieldDefn->GetDomainName();
2445 : const auto oIter =
2446 261 : m_oMapFieldDomainToStringArray.find(osDomainName);
2447 261 : if (oIter != m_oMapFieldDomainToStringArray.end())
2448 : {
2449 : auto result = arrow::DictionaryArray::FromArrays(
2450 12 : field->type(), array, oIter->second);
2451 12 : if (!result.ok())
2452 : {
2453 0 : CPLError(CE_Failure, CPLE_AppDefined,
2454 : "DictionaryArray::FromArrays() for field %s "
2455 : "failed with %s",
2456 0 : field->name().c_str(),
2457 0 : result.status().message().c_str());
2458 0 : return false;
2459 : }
2460 12 : array = *result;
2461 : }
2462 : }
2463 : }
2464 :
2465 1878 : if (!postProcessArray(field, array))
2466 : {
2467 0 : return false;
2468 : }
2469 :
2470 1878 : nArrowIdx++;
2471 : }
2472 :
2473 355 : if (m_bWriteBBoxStruct)
2474 : {
2475 263 : const int nGeomFieldCount = m_poFeatureDefn->GetGeomFieldCount();
2476 522 : for (int i = 0; i < nGeomFieldCount; ++i)
2477 : {
2478 259 : const auto &field = m_apoFieldsBBOX[i];
2479 0 : std::shared_ptr<arrow::Array> array;
2480 259 : auto status = m_apoBuildersBBOXStruct[i]->Finish(&array);
2481 259 : if (!status.ok())
2482 : {
2483 0 : CPLError(CE_Failure, CPLE_AppDefined,
2484 : "builder::Finish() for field %s failed with %s",
2485 0 : field->name().c_str(), status.message().c_str());
2486 0 : return false;
2487 : }
2488 :
2489 259 : if (!postProcessArray(field, array))
2490 : {
2491 0 : return false;
2492 : }
2493 : }
2494 : }
2495 :
2496 355 : return true;
2497 : }
2498 :
2499 : /************************************************************************/
2500 : /* TestBit() */
2501 : /************************************************************************/
2502 :
2503 516 : static inline bool TestBit(const uint8_t *pabyData, size_t nIdx)
2504 : {
2505 516 : return (pabyData[nIdx / 8] & (1 << (nIdx % 8))) != 0;
2506 : }
2507 :
2508 : /************************************************************************/
2509 : /* WriteArrowBatchInternal() */
2510 : /************************************************************************/
2511 :
2512 137 : inline bool OGRArrowWriterLayer::WriteArrowBatchInternal(
2513 : const struct ArrowSchema *schema, struct ArrowArray *array,
2514 : CSLConstList papszOptions,
2515 : std::function<bool(const std::shared_ptr<arrow::RecordBatch> &)> writeBatch)
2516 : {
2517 : #ifdef __COVERITY__
2518 : (void)schema;
2519 : (void)array;
2520 : (void)papszOptions;
2521 : (void)writeBatch;
2522 : CPLError(CE_Failure, CPLE_AppDefined, "Not implemented");
2523 : return false;
2524 : #else
2525 137 : if (m_poSchema == nullptr)
2526 : {
2527 131 : CreateSchema();
2528 : }
2529 :
2530 137 : if (!IsFileWriterCreated())
2531 : {
2532 131 : CreateWriter();
2533 131 : if (!IsFileWriterCreated())
2534 0 : return false;
2535 : }
2536 :
2537 137 : if (m_apoBuilders.empty())
2538 : {
2539 131 : CreateArrayBuilders();
2540 : }
2541 :
2542 137 : const int nGeomFieldCount = m_poFeatureDefn->GetGeomFieldCount();
2543 137 : const int nGeomFieldCountBBoxFields =
2544 137 : m_bWriteBBoxStruct ? nGeomFieldCount : 0;
2545 :
2546 137 : const char *pszFIDName = CSLFetchNameValueDef(
2547 : papszOptions, "FID", OGRLayer::DEFAULT_ARROW_FID_NAME);
2548 : const char *pszSingleGeomFieldName =
2549 137 : CSLFetchNameValue(papszOptions, "GEOMETRY_NAME");
2550 :
2551 : // Sort schema and array children in the same order as m_poSchema.
2552 : // This is needed for non-WKB geometry encoding
2553 274 : std::map<std::string, int> oMapSchemaChildrenNameToIdx;
2554 1760 : for (int i = 0; i < static_cast<int>(schema->n_children); ++i)
2555 : {
2556 1623 : if (cpl::contains(oMapSchemaChildrenNameToIdx,
2557 1623 : schema->children[i]->name))
2558 : {
2559 0 : CPLError(CE_Failure, CPLE_AppDefined,
2560 : "Several fields with same name '%s' found",
2561 0 : schema->children[i]->name);
2562 0 : return false;
2563 : }
2564 1623 : oMapSchemaChildrenNameToIdx[schema->children[i]->name] = i;
2565 :
2566 1623 : if (!pszSingleGeomFieldName && schema->children[i]->metadata)
2567 : {
2568 : const auto oMetadata =
2569 284 : OGRParseArrowMetadata(schema->children[i]->metadata);
2570 142 : const auto oIter = oMetadata.find(ARROW_EXTENSION_NAME_KEY);
2571 301 : if (oIter != oMetadata.end() &&
2572 159 : (oIter->second == EXTENSION_NAME_OGC_WKB ||
2573 19 : oIter->second == EXTENSION_NAME_GEOARROW_WKB))
2574 : {
2575 129 : pszSingleGeomFieldName = schema->children[i]->name;
2576 : }
2577 : }
2578 : }
2579 137 : if (!pszSingleGeomFieldName)
2580 8 : pszSingleGeomFieldName = OGRLayer::DEFAULT_ARROW_GEOMETRY_NAME;
2581 :
2582 137 : std::vector<int> anMapLayerSchemaToArraySchema(m_poSchema->num_fields(),
2583 411 : -1);
2584 : struct ArrowArray fidArray;
2585 : struct ArrowSchema fidSchema;
2586 137 : memset(&fidArray, 0, sizeof(fidArray));
2587 137 : memset(&fidSchema, 0, sizeof(fidSchema));
2588 274 : std::vector<void *> apBuffersFid;
2589 274 : std::vector<int64_t> fids;
2590 :
2591 274 : std::set<int> oSetReferencedFieldsInArraySchema;
2592 0 : const auto DummyFreeArray = [](struct ArrowArray *ptrArray)
2593 0 : { ptrArray->release = nullptr; };
2594 137 : const auto DummyFreeSchema = [](struct ArrowSchema *ptrSchema)
2595 137 : { ptrSchema->release = nullptr; };
2596 137 : bool bRebuildBatch = false;
2597 1758 : for (int i = 0; i < m_poSchema->num_fields() - nGeomFieldCountBBoxFields;
2598 : ++i)
2599 : {
2600 : auto oIter =
2601 1621 : oMapSchemaChildrenNameToIdx.find(m_poSchema->field(i)->name());
2602 1621 : if (oIter == oMapSchemaChildrenNameToIdx.end())
2603 : {
2604 7 : if (m_poSchema->field(i)->name() == m_osFIDColumn)
2605 : {
2606 1 : oIter = oMapSchemaChildrenNameToIdx.find(pszFIDName);
2607 1 : if (oIter == oMapSchemaChildrenNameToIdx.end())
2608 : {
2609 : // If the input data does not contain a FID column, but
2610 : // the output file requires it, creates a default FID column
2611 0 : fidArray.release = DummyFreeArray;
2612 0 : fidArray.n_buffers = 2;
2613 0 : apBuffersFid.resize(2);
2614 0 : fidArray.buffers =
2615 0 : const_cast<const void **>(apBuffersFid.data());
2616 0 : fids.reserve(static_cast<size_t>(array->length));
2617 0 : for (size_t iRow = 0;
2618 0 : iRow < static_cast<size_t>(array->length); ++iRow)
2619 0 : fids.push_back(m_nFeatureCount + iRow);
2620 0 : fidArray.buffers[1] = fids.data();
2621 0 : fidArray.length = array->length;
2622 0 : fidSchema.release = DummyFreeSchema;
2623 0 : fidSchema.name = m_osFIDColumn.c_str();
2624 0 : fidSchema.format = "l"; // int64
2625 0 : continue;
2626 : }
2627 : }
2628 12 : else if (nGeomFieldCount == 1 &&
2629 6 : m_poFeatureDefn->GetGeomFieldIndex(
2630 6 : m_poSchema->field(i)->name().c_str()) == 0)
2631 : {
2632 : oIter =
2633 6 : oMapSchemaChildrenNameToIdx.find(pszSingleGeomFieldName);
2634 6 : if (oIter != oMapSchemaChildrenNameToIdx.end())
2635 6 : bRebuildBatch = true;
2636 : }
2637 :
2638 7 : if (oIter == oMapSchemaChildrenNameToIdx.end())
2639 : {
2640 0 : CPLError(CE_Failure, CPLE_AppDefined,
2641 : "Cannot find field '%s' in schema",
2642 0 : m_poSchema->field(i)->name().c_str());
2643 0 : return false;
2644 : }
2645 : }
2646 1621 : anMapLayerSchemaToArraySchema[i] = oIter->second;
2647 1621 : oSetReferencedFieldsInArraySchema.insert(oIter->second);
2648 : }
2649 :
2650 : // Note: we cheat a bit by declaring a single instance of the minx/miny/
2651 : // maxx/maxy sub-field ArrowSchema*, and make all struct ArrowSchema point
2652 : // to them. That's OK because we use DummyFreeSchema to release, which does
2653 : // nothing.
2654 : struct ArrowSchema bboxStructSchemaXMin;
2655 : struct ArrowSchema bboxStructSchemaYMin;
2656 : struct ArrowSchema bboxStructSchemaXMax;
2657 : struct ArrowSchema bboxStructSchemaYMax;
2658 137 : constexpr int BBOX_SUBFIELD_COUNT = 4;
2659 : std::array<struct ArrowSchema *, BBOX_SUBFIELD_COUNT>
2660 : bboxStructSchemaChildren;
2661 137 : constexpr int BBOX_STRUCT_BUFFER_COUNT = 1; // validity bitmap array
2662 : // cppcheck-suppress constStatement
2663 : std::vector<std::array<const void *, BBOX_STRUCT_BUFFER_COUNT>>
2664 274 : bboxStructBuffersPtr;
2665 274 : std::vector<std::vector<GByte>> aabyBboxStructValidity;
2666 274 : std::vector<std::vector<float>> aadfMinX, aadfMinY, aadfMaxX, aadfMaxY;
2667 : // cppcheck-suppress constStatement
2668 274 : std::vector<std::array<struct ArrowArray, BBOX_SUBFIELD_COUNT>> bboxArrays;
2669 : // cppcheck-suppress constStatement
2670 : std::vector<std::array<struct ArrowArray *, BBOX_SUBFIELD_COUNT>>
2671 274 : bboxArraysPtr;
2672 137 : constexpr int BBOX_SUBFIELD_BUFFER_COUNT =
2673 : 2; // validity bitmap array and float array
2674 : std::vector<std::array<std::array<const void *, BBOX_SUBFIELD_BUFFER_COUNT>,
2675 : BBOX_SUBFIELD_COUNT>>
2676 274 : bboxBuffersPtr;
2677 :
2678 : // Temporary arrays to hold the geometry bounding boxes.
2679 274 : std::vector<struct ArrowArray> bboxStructArray;
2680 274 : std::vector<struct ArrowSchema> bboxStructSchema;
2681 :
2682 274 : std::vector<struct ArrowSchema *> newSchemaChildren;
2683 274 : std::vector<struct ArrowArray *> newArrayChildren;
2684 137 : newSchemaChildren.reserve(m_poSchema->num_fields());
2685 137 : newArrayChildren.reserve(m_poSchema->num_fields());
2686 1758 : for (int i = 0; i < m_poSchema->num_fields() - nGeomFieldCountBBoxFields;
2687 : ++i)
2688 : {
2689 1621 : if (anMapLayerSchemaToArraySchema[i] < 0)
2690 : {
2691 0 : CPLAssert(m_poSchema->field(i)->name() == m_osFIDColumn);
2692 0 : newSchemaChildren.emplace_back(&fidSchema);
2693 0 : newArrayChildren.emplace_back(&fidArray);
2694 : }
2695 : else
2696 : {
2697 : newSchemaChildren.emplace_back(
2698 1621 : schema->children[anMapLayerSchemaToArraySchema[i]]);
2699 : newArrayChildren.emplace_back(
2700 1621 : array->children[anMapLayerSchemaToArraySchema[i]]);
2701 : }
2702 : }
2703 :
2704 137 : if (m_bWriteBBoxStruct)
2705 : {
2706 25 : memset(&bboxStructSchemaXMin, 0, sizeof(bboxStructSchemaXMin));
2707 25 : memset(&bboxStructSchemaYMin, 0, sizeof(bboxStructSchemaYMin));
2708 25 : memset(&bboxStructSchemaXMax, 0, sizeof(bboxStructSchemaXMax));
2709 25 : memset(&bboxStructSchemaYMax, 0, sizeof(bboxStructSchemaYMax));
2710 :
2711 25 : bboxStructSchemaXMin.release = DummyFreeSchema;
2712 25 : bboxStructSchemaXMin.name = "xmin";
2713 25 : bboxStructSchemaXMin.format = "f"; // float32
2714 :
2715 25 : bboxStructSchemaYMin.release = DummyFreeSchema;
2716 25 : bboxStructSchemaYMin.name = "ymin";
2717 25 : bboxStructSchemaYMin.format = "f"; // float32
2718 :
2719 25 : bboxStructSchemaXMax.release = DummyFreeSchema;
2720 25 : bboxStructSchemaXMax.name = "xmax";
2721 25 : bboxStructSchemaXMax.format = "f"; // float32
2722 :
2723 25 : bboxStructSchemaYMax.release = DummyFreeSchema;
2724 25 : bboxStructSchemaYMax.name = "ymax";
2725 25 : bboxStructSchemaYMax.format = "f"; // float32
2726 :
2727 : try
2728 : {
2729 25 : constexpr int XMIN_IDX = 0;
2730 25 : constexpr int YMIN_IDX = 1;
2731 25 : constexpr int XMAX_IDX = 2;
2732 25 : constexpr int YMAX_IDX = 3;
2733 25 : bboxStructSchemaChildren[XMIN_IDX] = &bboxStructSchemaXMin;
2734 : // cppcheck-suppress objectIndex
2735 25 : bboxStructSchemaChildren[YMIN_IDX] = &bboxStructSchemaYMin;
2736 : // cppcheck-suppress objectIndex
2737 25 : bboxStructSchemaChildren[XMAX_IDX] = &bboxStructSchemaXMax;
2738 : // cppcheck-suppress objectIndex
2739 25 : bboxStructSchemaChildren[YMAX_IDX] = &bboxStructSchemaYMax;
2740 :
2741 25 : bboxStructArray.resize(nGeomFieldCount);
2742 25 : bboxStructSchema.resize(nGeomFieldCount);
2743 25 : bboxArrays.resize(nGeomFieldCount);
2744 25 : bboxArraysPtr.resize(nGeomFieldCount);
2745 25 : bboxBuffersPtr.resize(nGeomFieldCount);
2746 25 : bboxStructBuffersPtr.resize(nGeomFieldCount);
2747 25 : aabyBboxStructValidity.resize(nGeomFieldCount);
2748 50 : memset(bboxStructArray.data(), 0,
2749 25 : nGeomFieldCount * sizeof(bboxStructArray[0]));
2750 50 : memset(bboxStructSchema.data(), 0,
2751 25 : nGeomFieldCount * sizeof(bboxStructSchema[0]));
2752 50 : memset(bboxArrays.data(), 0,
2753 25 : nGeomFieldCount * sizeof(bboxArrays[0]));
2754 25 : aadfMinX.resize(nGeomFieldCount);
2755 25 : aadfMinY.resize(nGeomFieldCount);
2756 25 : aadfMaxX.resize(nGeomFieldCount);
2757 25 : aadfMaxY.resize(nGeomFieldCount);
2758 50 : for (int i = 0; i < nGeomFieldCount; ++i)
2759 : {
2760 25 : const bool bIsNullable = CPL_TO_BOOL(
2761 25 : m_poFeatureDefn->GetGeomFieldDefn(i)->IsNullable());
2762 25 : aadfMinX[i].reserve(static_cast<size_t>(array->length));
2763 25 : aadfMinY[i].reserve(static_cast<size_t>(array->length));
2764 25 : aadfMaxX[i].reserve(static_cast<size_t>(array->length));
2765 25 : aadfMaxY[i].reserve(static_cast<size_t>(array->length));
2766 25 : aabyBboxStructValidity[i].resize(
2767 25 : static_cast<size_t>(array->length + 7) / 8, 0xFF);
2768 :
2769 25 : bboxStructSchema[i].release = DummyFreeSchema;
2770 25 : bboxStructSchema[i].name = m_apoFieldsBBOX[i]->name().c_str();
2771 25 : bboxStructSchema[i].format = "+s"; // structure
2772 25 : bboxStructSchema[i].flags =
2773 25 : bIsNullable ? ARROW_FLAG_NULLABLE : 0;
2774 25 : bboxStructSchema[i].n_children = BBOX_SUBFIELD_COUNT;
2775 25 : bboxStructSchema[i].children = bboxStructSchemaChildren.data();
2776 :
2777 25 : constexpr int VALIDITY_ARRAY_IDX = 0;
2778 25 : constexpr int BBOX_SUBFIELD_FLOAT_VALUE_IDX = 1;
2779 25 : bboxBuffersPtr[i][XMIN_IDX][BBOX_SUBFIELD_FLOAT_VALUE_IDX] =
2780 25 : aadfMinX[i].data();
2781 25 : bboxBuffersPtr[i][YMIN_IDX][BBOX_SUBFIELD_FLOAT_VALUE_IDX] =
2782 25 : aadfMinY[i].data();
2783 25 : bboxBuffersPtr[i][XMAX_IDX][BBOX_SUBFIELD_FLOAT_VALUE_IDX] =
2784 25 : aadfMaxX[i].data();
2785 25 : bboxBuffersPtr[i][YMAX_IDX][BBOX_SUBFIELD_FLOAT_VALUE_IDX] =
2786 25 : aadfMaxY[i].data();
2787 :
2788 125 : for (int j = 0; j < BBOX_SUBFIELD_COUNT; ++j)
2789 : {
2790 100 : bboxBuffersPtr[i][j][VALIDITY_ARRAY_IDX] = nullptr;
2791 :
2792 100 : bboxArrays[i][j].release = DummyFreeArray;
2793 100 : bboxArrays[i][j].length = array->length;
2794 100 : bboxArrays[i][j].n_buffers = BBOX_SUBFIELD_BUFFER_COUNT;
2795 100 : bboxArrays[i][j].buffers = bboxBuffersPtr[i][j].data();
2796 :
2797 100 : bboxArraysPtr[i][j] = &bboxArrays[i][j];
2798 : }
2799 :
2800 25 : bboxStructArray[i].release = DummyFreeArray;
2801 25 : bboxStructArray[i].n_children = BBOX_SUBFIELD_COUNT;
2802 : // coverity[escape]
2803 25 : bboxStructArray[i].children = bboxArraysPtr[i].data();
2804 25 : bboxStructArray[i].length = array->length;
2805 25 : bboxStructArray[i].n_buffers = BBOX_STRUCT_BUFFER_COUNT;
2806 25 : bboxStructBuffersPtr[i][VALIDITY_ARRAY_IDX] =
2807 25 : bIsNullable ? aabyBboxStructValidity[i].data() : nullptr;
2808 : // coverity[escape]
2809 25 : bboxStructArray[i].buffers = bboxStructBuffersPtr[i].data();
2810 :
2811 25 : newSchemaChildren.emplace_back(&bboxStructSchema[i]);
2812 25 : newArrayChildren.emplace_back(&bboxStructArray[i]);
2813 : }
2814 : }
2815 0 : catch (const std::bad_alloc &)
2816 : {
2817 0 : CPLError(CE_Failure, CPLE_OutOfMemory,
2818 : "Out of memory in "
2819 : "OGRArrowWriterLayer::WriteArrowBatchInternal()");
2820 0 : return false;
2821 : }
2822 : }
2823 :
2824 1760 : for (int i = 0; i < static_cast<int>(schema->n_children); ++i)
2825 : {
2826 1623 : if (!cpl::contains(oSetReferencedFieldsInArraySchema, i))
2827 : {
2828 4 : if (m_osFIDColumn.empty() &&
2829 2 : strcmp(schema->children[i]->name, pszFIDName) == 0)
2830 : {
2831 : // If the input data contains a FID column, but the output data
2832 : // does not, then ignore it.
2833 : }
2834 : else
2835 : {
2836 0 : CPLError(CE_Failure, CPLE_AppDefined,
2837 : "Found field '%s' in array schema that does not exist "
2838 : "in layer schema",
2839 0 : schema->children[i]->name);
2840 0 : return false;
2841 : }
2842 : }
2843 : }
2844 :
2845 : // ImportSchema() would release the schema, but we don't want that
2846 : // So copy the structure content into a local variable, and override its
2847 : // release callback to a no-op. This may be a bit fragile, but it doesn't
2848 : // look like ImportSchema implementation tries to access the C ArrowSchema
2849 : // after it has been called.
2850 137 : struct ArrowSchema lSchema = *schema;
2851 137 : schema = &lSchema;
2852 137 : CPL_IGNORE_RET_VAL(schema);
2853 :
2854 137 : lSchema.n_children = newSchemaChildren.size();
2855 137 : lSchema.children = newSchemaChildren.data();
2856 :
2857 137 : lSchema.release = DummyFreeSchema;
2858 274 : auto poSchemaResult = arrow::ImportSchema(&lSchema);
2859 137 : CPLAssert(lSchema.release == nullptr);
2860 137 : if (!poSchemaResult.ok())
2861 : {
2862 0 : CPLError(CE_Failure, CPLE_AppDefined, "ImportSchema() failed with %s",
2863 0 : poSchemaResult.status().message().c_str());
2864 0 : return false;
2865 : }
2866 274 : auto poSchema = *poSchemaResult;
2867 :
2868 : // Hack the array to use the new children we've computed above
2869 : // but make sure the original release() callback sees the original children
2870 : struct ArrayReleaser
2871 : {
2872 : struct ArrowArray ori_array
2873 : {
2874 : };
2875 :
2876 137 : explicit ArrayReleaser(struct ArrowArray *array)
2877 137 : {
2878 137 : memcpy(&ori_array, array, sizeof(*array));
2879 137 : array->release = ArrayReleaser::release;
2880 137 : array->private_data = this;
2881 137 : }
2882 :
2883 137 : static void release(struct ArrowArray *array)
2884 : {
2885 137 : struct ArrayReleaser *releaser =
2886 : static_cast<struct ArrayReleaser *>(array->private_data);
2887 137 : memcpy(array, &(releaser->ori_array), sizeof(*array));
2888 137 : CPLAssert(array->release != nullptr);
2889 137 : array->release(array);
2890 137 : CPLAssert(array->release == nullptr);
2891 137 : delete releaser;
2892 137 : }
2893 : };
2894 :
2895 : // Must be allocated on the heap, since ArrayReleaser::release() will be
2896 : // called after this method has ended.
2897 137 : ArrayReleaser *releaser = new ArrayReleaser(array);
2898 137 : array->private_data = releaser;
2899 137 : array->n_children = newArrayChildren.size();
2900 : // cppcheck-suppress autoVariables
2901 137 : array->children = newArrayChildren.data();
2902 :
2903 : // Process geometry columns:
2904 : // - if the output encoding is WKB, then just note the geometry type and
2905 : // envelope.
2906 : // - otherwise convert to the output encoding.
2907 137 : int nBuilderIdx = 0;
2908 137 : if (!m_osFIDColumn.empty())
2909 : {
2910 2 : nBuilderIdx++;
2911 : }
2912 : std::map<std::string, std::shared_ptr<arrow::Array>>
2913 274 : oMapGeomFieldNameToArray;
2914 274 : for (int i = 0; i < nGeomFieldCount; ++i, ++nBuilderIdx)
2915 : {
2916 : const char *pszThisGeomFieldName =
2917 137 : m_poFeatureDefn->GetGeomFieldDefn(i)->GetNameRef();
2918 137 : int nIdx = poSchema->GetFieldIndex(pszThisGeomFieldName);
2919 137 : if (nIdx < 0)
2920 : {
2921 6 : if (nGeomFieldCount == 1)
2922 6 : nIdx = poSchema->GetFieldIndex(pszSingleGeomFieldName);
2923 6 : if (nIdx < 0)
2924 : {
2925 0 : CPLError(CE_Failure, CPLE_AppDefined,
2926 : "Cannot find geometry field '%s' in schema",
2927 : pszThisGeomFieldName);
2928 0 : return false;
2929 : }
2930 : }
2931 :
2932 137 : if (strcmp(lSchema.children[nIdx]->format, "z") != 0 &&
2933 1 : strcmp(lSchema.children[nIdx]->format, "Z") != 0)
2934 : {
2935 0 : CPLError(CE_Failure, CPLE_AppDefined,
2936 : "Type of geometry field '%s' is not binary, but '%s'",
2937 0 : pszThisGeomFieldName, lSchema.children[nIdx]->format);
2938 0 : return false;
2939 : }
2940 :
2941 137 : const auto psGeomArray = array->children[nIdx];
2942 137 : const uint8_t *pabyValidity =
2943 137 : psGeomArray->null_count != 0
2944 137 : ? static_cast<const uint8_t *>(psGeomArray->buffers[0])
2945 : : nullptr;
2946 137 : const bool bUseOffsets32 =
2947 137 : (strcmp(lSchema.children[nIdx]->format, "z") == 0);
2948 137 : const uint32_t *panOffsets32 =
2949 137 : static_cast<const uint32_t *>(psGeomArray->buffers[1]) +
2950 137 : psGeomArray->offset;
2951 137 : const uint64_t *panOffsets64 =
2952 137 : static_cast<const uint64_t *>(psGeomArray->buffers[1]) +
2953 137 : psGeomArray->offset;
2954 137 : GByte *pabyData =
2955 137 : static_cast<GByte *>(const_cast<void *>(psGeomArray->buffers[2]));
2956 137 : OGREnvelope sEnvelope;
2957 137 : auto poBuilder = m_apoBuilders[nBuilderIdx].get();
2958 :
2959 712 : for (size_t iRow = 0; iRow < static_cast<size_t>(psGeomArray->length);
2960 : ++iRow)
2961 : {
2962 575 : bool bValidGeom = false;
2963 :
2964 1091 : if (!pabyValidity ||
2965 516 : TestBit(pabyValidity,
2966 516 : static_cast<size_t>(iRow + psGeomArray->offset)))
2967 : {
2968 447 : const auto nLen =
2969 447 : bUseOffsets32 ? static_cast<size_t>(panOffsets32[iRow + 1] -
2970 437 : panOffsets32[iRow])
2971 10 : : static_cast<size_t>(panOffsets64[iRow + 1] -
2972 10 : panOffsets64[iRow]);
2973 447 : GByte *pabyWkb =
2974 447 : pabyData + (bUseOffsets32
2975 437 : ? panOffsets32[iRow]
2976 10 : : static_cast<size_t>(panOffsets64[iRow]));
2977 447 : if (m_aeGeomEncoding[i] == OGRArrowGeomEncoding::WKB)
2978 : {
2979 179 : FixupWKBGeometryBeforeWriting(pabyWkb, nLen);
2980 :
2981 179 : uint32_t nType = 0;
2982 179 : bool bNeedSwap = false;
2983 179 : if (OGRWKBGetGeomType(pabyWkb, nLen, bNeedSwap, nType))
2984 : {
2985 179 : m_oSetWrittenGeometryTypes[i].insert(
2986 179 : static_cast<OGRwkbGeometryType>(nType));
2987 179 : if (OGRWKBGetBoundingBox(pabyWkb, nLen, sEnvelope))
2988 : {
2989 179 : bValidGeom = true;
2990 179 : m_aoEnvelopes[i].Merge(sEnvelope);
2991 :
2992 179 : if (m_bWriteBBoxStruct)
2993 : {
2994 51 : aadfMinX[i].push_back(
2995 51 : castToFloatDown(sEnvelope.MinX));
2996 51 : aadfMinY[i].push_back(
2997 51 : castToFloatDown(sEnvelope.MinY));
2998 51 : aadfMaxX[i].push_back(
2999 51 : castToFloatUp(sEnvelope.MaxX));
3000 51 : aadfMaxY[i].push_back(
3001 51 : castToFloatUp(sEnvelope.MaxY));
3002 : }
3003 : }
3004 : }
3005 : }
3006 : else
3007 : {
3008 268 : size_t nBytesConsumedOut = 0;
3009 268 : OGRGeometry *poGeometry = nullptr;
3010 268 : OGRGeometryFactory::createFromWkb(
3011 : pabyWkb, nullptr, &poGeometry, nLen, wkbVariantIso,
3012 : nBytesConsumedOut);
3013 268 : if (BuildGeometry(poGeometry, i, poBuilder) != OGRERR_NONE)
3014 : {
3015 0 : delete poGeometry;
3016 0 : return false;
3017 : }
3018 268 : bValidGeom = true;
3019 268 : if (m_bWriteBBoxStruct)
3020 : {
3021 0 : poGeometry->getEnvelope(&sEnvelope);
3022 0 : aadfMinX[i].push_back(castToFloatDown(sEnvelope.MinX));
3023 0 : aadfMinY[i].push_back(castToFloatDown(sEnvelope.MinY));
3024 0 : aadfMaxX[i].push_back(castToFloatUp(sEnvelope.MaxX));
3025 0 : aadfMaxY[i].push_back(castToFloatUp(sEnvelope.MaxY));
3026 : }
3027 268 : delete poGeometry;
3028 : }
3029 : }
3030 : else
3031 : {
3032 128 : if (m_aeGeomEncoding[i] != OGRArrowGeomEncoding::WKB)
3033 : {
3034 81 : if (BuildGeometry(nullptr, i, poBuilder) != OGRERR_NONE)
3035 0 : return false;
3036 : }
3037 : }
3038 :
3039 575 : if (!bValidGeom && m_bWriteBBoxStruct)
3040 : {
3041 18 : if ((bboxStructSchema[i].flags & ARROW_FLAG_NULLABLE))
3042 : {
3043 18 : bboxStructArray[i].null_count++;
3044 18 : aabyBboxStructValidity[i][iRow / 8] &=
3045 18 : ~(1 << static_cast<int>(iRow % 8));
3046 : }
3047 18 : aadfMinX[i].push_back(0.0f);
3048 18 : aadfMinY[i].push_back(0.0f);
3049 18 : aadfMaxX[i].push_back(0.0f);
3050 18 : aadfMaxY[i].push_back(0.0f);
3051 : }
3052 : }
3053 :
3054 137 : if (m_aeGeomEncoding[i] != OGRArrowGeomEncoding::WKB)
3055 : {
3056 0 : std::shared_ptr<arrow::Array> geomArray;
3057 81 : auto status = poBuilder->Finish(&geomArray);
3058 81 : if (!status.ok())
3059 : {
3060 0 : CPLError(CE_Failure, CPLE_AppDefined,
3061 : "builder::Finish() for field %s failed with %s",
3062 0 : pszThisGeomFieldName, status.message().c_str());
3063 0 : return false;
3064 : }
3065 162 : oMapGeomFieldNameToArray[pszThisGeomFieldName] =
3066 162 : std::move(geomArray);
3067 : }
3068 : }
3069 :
3070 : auto poRecordBatchResult =
3071 274 : arrow::ImportRecordBatch(array, std::move(poSchema));
3072 137 : if (!poRecordBatchResult.ok())
3073 : {
3074 0 : CPLError(CE_Failure, CPLE_AppDefined,
3075 : "ImportRecordBatch() failed with %s",
3076 0 : poRecordBatchResult.status().message().c_str());
3077 0 : return false;
3078 : }
3079 274 : auto poRecordBatch = *poRecordBatchResult;
3080 :
3081 137 : if (!(bRebuildBatch || !oMapGeomFieldNameToArray.empty()))
3082 : {
3083 1106 : for (int i = 0; i < m_poSchema->num_fields(); ++i)
3084 : {
3085 : const auto oIter =
3086 1056 : oMapGeomFieldNameToArray.find(m_poSchema->field(i)->name());
3087 1056 : auto l_array = (oIter != oMapGeomFieldNameToArray.end())
3088 0 : ? oIter->second
3089 2112 : : poRecordBatch->column(i);
3090 2112 : const auto schemaType = m_poSchema->field(i)->type();
3091 2112 : const auto arrayType = l_array->type();
3092 2104 : if (schemaType->id() != arrow::Type::EXTENSION &&
3093 1048 : arrayType->id() == arrow::Type::EXTENSION)
3094 : {
3095 0 : bRebuildBatch = true;
3096 : }
3097 1056 : else if (schemaType->id() != arrayType->id())
3098 : {
3099 3 : CPLDebug(
3100 : "Arrow",
3101 : "Field idx=%d name='%s', schema type=%s, array type=%s", i,
3102 1 : m_poSchema->field(i)->name().c_str(),
3103 2 : schemaType->ToString().c_str(),
3104 2 : arrayType->ToString().c_str());
3105 : }
3106 : }
3107 : }
3108 :
3109 : // below assertion commented out since it is not strictly necessary, but
3110 : // reflects what ImportRecordBatch() does.
3111 : // CPLAssert(array->release == nullptr);
3112 :
3113 : // We may need to reconstruct a final record batch that perfectly matches
3114 : // the expected schema.
3115 137 : if (bRebuildBatch || !oMapGeomFieldNameToArray.empty())
3116 : {
3117 87 : std::vector<std::shared_ptr<arrow::Array>> apoArrays;
3118 677 : for (int i = 0; i < m_poSchema->num_fields(); ++i)
3119 : {
3120 : const auto oIter =
3121 590 : oMapGeomFieldNameToArray.find(m_poSchema->field(i)->name());
3122 590 : if (oIter != oMapGeomFieldNameToArray.end())
3123 81 : apoArrays.emplace_back(oIter->second);
3124 : else
3125 509 : apoArrays.emplace_back(poRecordBatch->column(i));
3126 :
3127 590 : auto expectedFieldType = m_poSchema->field(i)->type();
3128 590 : if (expectedFieldType->id() == arrow::Type::EXTENSION)
3129 : {
3130 0 : auto extensionType = cpl::down_cast<arrow::ExtensionType *>(
3131 : expectedFieldType.get());
3132 0 : expectedFieldType = extensionType->storage_type();
3133 : }
3134 :
3135 590 : if (apoArrays.back()->type()->id() == arrow::Type::EXTENSION)
3136 : {
3137 0 : apoArrays.back() =
3138 0 : std::static_pointer_cast<arrow::ExtensionArray>(
3139 0 : apoArrays.back())
3140 0 : ->storage();
3141 : }
3142 :
3143 590 : if (apoArrays.back()->type()->id() != expectedFieldType->id())
3144 : {
3145 0 : CPLError(
3146 : CE_Failure, CPLE_AppDefined,
3147 : "Field '%s' of unexpected type. Got '%s', expected '%s'",
3148 0 : m_poSchema->field(i)->name().c_str(),
3149 0 : apoArrays.back()->type()->name().c_str(),
3150 0 : expectedFieldType->name().c_str());
3151 0 : return false;
3152 : }
3153 : }
3154 348 : poRecordBatchResult = arrow::RecordBatch::Make(
3155 261 : m_poSchema, poRecordBatch->num_rows(), std::move(apoArrays));
3156 87 : if (!poRecordBatchResult.ok())
3157 : {
3158 0 : CPLError(CE_Failure, CPLE_AppDefined,
3159 : "RecordBatch::Make() failed with %s",
3160 0 : poRecordBatchResult.status().message().c_str());
3161 0 : return false;
3162 : }
3163 87 : poRecordBatch = *poRecordBatchResult;
3164 : }
3165 :
3166 137 : if (writeBatch(poRecordBatch))
3167 : {
3168 137 : m_nFeatureCount += poRecordBatch->num_rows();
3169 137 : return true;
3170 : }
3171 0 : return false;
3172 : #endif
3173 : }
3174 :
3175 : #endif /* OGARROWWRITERLAYER_HPP_INCLUDED */
|