Line data Source code
1 : /******************************************************************************
2 : *
3 : * Project: Arrow generic code
4 : * Purpose: Arrow generic code
5 : * Author: Even Rouault, <even.rouault at spatialys.com>
6 : *
7 : ******************************************************************************
8 : * Copyright (c) 2022, Planet Labs
9 : *
10 : * SPDX-License-Identifier: MIT
11 : ****************************************************************************/
12 :
13 : #ifndef OGARROWWRITERLAYER_HPP_INCLUDED
14 : #define OGARROWWRITERLAYER_HPP_INCLUDED
15 :
16 : #include "ogr_arrow.h"
17 :
18 : #include "cpl_json.h"
19 : #include "cpl_time.h"
20 :
21 : #include "ogrlayerarrow.h"
22 : #include "ogr_wkb.h"
23 :
24 : #include <array>
25 : #include <cinttypes>
26 : #include <limits>
27 :
28 : static constexpr int TZFLAG_UNINITIALIZED = -1;
29 :
30 : #define OGR_ARROW_RETURN_NOT_OK(status, ret_value) \
31 : do \
32 : { \
33 : if (!(status).ok()) \
34 : { \
35 : CPLError(CE_Failure, CPLE_AppDefined, "%s failed", \
36 : ARROW_STRINGIFY(status)); \
37 : return (ret_value); \
38 : } \
39 : } while (false)
40 :
41 : #define OGR_ARROW_RETURN_FALSE_NOT_OK(status) \
42 : OGR_ARROW_RETURN_NOT_OK(status, false)
43 :
44 : #define OGR_ARROW_RETURN_OGRERR_NOT_OK(status) \
45 : OGR_ARROW_RETURN_NOT_OK(status, OGRERR_FAILURE)
46 :
47 : #define OGR_ARROW_PROPAGATE_OGRERR(ret_value) \
48 : do \
49 : { \
50 : if ((ret_value) != OGRERR_NONE) \
51 : return OGRERR_FAILURE; \
52 : } while (0)
53 :
54 : /************************************************************************/
55 : /* OGRArrowWriterLayer() */
56 : /************************************************************************/
57 :
58 415 : inline OGRArrowWriterLayer::OGRArrowWriterLayer(
59 : arrow::MemoryPool *poMemoryPool,
60 : const std::shared_ptr<arrow::io::OutputStream> &poOutputStream,
61 415 : const char *pszLayerName)
62 415 : : m_poMemoryPool(poMemoryPool), m_poOutputStream(poOutputStream)
63 : {
64 415 : m_poFeatureDefn = new OGRFeatureDefn(pszLayerName);
65 415 : m_poFeatureDefn->SetGeomType(wkbNone);
66 415 : m_poFeatureDefn->Reference();
67 415 : SetDescription(pszLayerName);
68 415 : }
69 :
70 : /************************************************************************/
71 : /* ~OGRArrowWriterLayer() */
72 : /************************************************************************/
73 :
74 415 : inline OGRArrowWriterLayer::~OGRArrowWriterLayer()
75 : {
76 415 : CPLDebug("ARROW", "Memory pool (writer layer): bytes_allocated = %" PRId64,
77 415 : m_poMemoryPool->bytes_allocated());
78 415 : CPLDebug("ARROW", "Memory pool (writer layer): max_memory = %" PRId64,
79 415 : m_poMemoryPool->max_memory());
80 :
81 415 : m_poFeatureDefn->Release();
82 415 : }
83 :
84 : /************************************************************************/
85 : /* FinalizeWriting() */
86 : /************************************************************************/
87 :
88 403 : inline bool OGRArrowWriterLayer::FinalizeWriting()
89 : {
90 403 : bool ret = true;
91 :
92 403 : if (!IsFileWriterCreated())
93 : {
94 274 : CreateWriter();
95 : }
96 403 : if (IsFileWriterCreated())
97 : {
98 403 : PerformStepsBeforeFinalFlushGroup();
99 :
100 403 : if (!m_apoBuilders.empty() && m_apoFieldsFromArrowSchema.empty())
101 228 : ret = FlushGroup();
102 :
103 403 : if (!CloseFileWriter())
104 0 : ret = false;
105 : }
106 :
107 403 : return ret;
108 : }
109 :
110 : /************************************************************************/
111 : /* CreateSchemaCommon() */
112 : /************************************************************************/
113 :
114 403 : inline void OGRArrowWriterLayer::CreateSchemaCommon()
115 : {
116 403 : CPLAssert(static_cast<int>(m_aeGeomEncoding.size()) ==
117 : m_poFeatureDefn->GetGeomFieldCount());
118 :
119 806 : std::vector<std::shared_ptr<arrow::Field>> fields;
120 403 : bool bNeedGDALSchema = false;
121 :
122 403 : m_anTZFlag.resize(m_poFeatureDefn->GetFieldCount(), TZFLAG_UNINITIALIZED);
123 :
124 403 : if (!m_osFIDColumn.empty())
125 : {
126 18 : bNeedGDALSchema = true;
127 18 : fields.emplace_back(arrow::field(m_osFIDColumn, arrow::int64(), false));
128 : }
129 :
130 403 : if (!m_apoFieldsFromArrowSchema.empty())
131 : {
132 119 : fields.insert(fields.end(), m_apoFieldsFromArrowSchema.begin(),
133 238 : m_apoFieldsFromArrowSchema.end());
134 : }
135 :
136 1031 : for (int i = 0; i < m_poFeatureDefn->GetFieldCount(); ++i)
137 : {
138 628 : const auto poFieldDefn = m_poFeatureDefn->GetFieldDefn(i);
139 628 : std::shared_ptr<arrow::DataType> dt;
140 628 : const auto eDT = poFieldDefn->GetType();
141 628 : const auto eSubDT = poFieldDefn->GetSubType();
142 628 : const auto &osDomainName = poFieldDefn->GetDomainName();
143 628 : const OGRFieldDomain *poFieldDomain = nullptr;
144 628 : const int nWidth = poFieldDefn->GetWidth();
145 628 : if (!osDomainName.empty())
146 : {
147 4 : const auto oIter = m_oMapFieldDomains.find(osDomainName);
148 4 : if (oIter == m_oMapFieldDomains.end())
149 : {
150 0 : CPLError(CE_Warning, CPLE_AppDefined,
151 : "Field %s references domain %s, but the later one "
152 : "has not been created",
153 : poFieldDefn->GetNameRef(), osDomainName.c_str());
154 : }
155 : else
156 : {
157 4 : poFieldDomain = oIter->second.get();
158 : }
159 : }
160 628 : switch (eDT)
161 : {
162 65 : case OFTInteger:
163 65 : if (eSubDT == OFSTBoolean)
164 4 : dt = arrow::boolean();
165 61 : else if (eSubDT == OFSTInt16)
166 4 : dt = arrow::int16();
167 : else
168 57 : dt = arrow::int32();
169 65 : if (poFieldDomain != nullptr)
170 : {
171 4 : dt = arrow::dictionary(dt, arrow::utf8());
172 : }
173 65 : break;
174 :
175 26 : case OFTInteger64:
176 26 : dt = arrow::int64();
177 26 : if (poFieldDomain != nullptr)
178 : {
179 0 : dt = arrow::dictionary(dt, arrow::utf8());
180 : }
181 26 : break;
182 :
183 52 : case OFTReal:
184 : {
185 52 : const int nPrecision = poFieldDefn->GetPrecision();
186 52 : if (nWidth != 0 && nPrecision != 0)
187 : {
188 : // Since arrow 18.0, we could use arrow::smallest_decimal()
189 : // to return the smallest representation (i.e. possibly
190 : // decimal32 and decimal64). But for now keep decimal128
191 : // as the minimum for backwards compatibility.
192 : // GetValueDecimal() and other functions in
193 : // ogrlayerarrow.cpp would have to be adapted for decimal32
194 : // and decimal64 compatibility.
195 8 : if (nWidth > 38)
196 0 : dt = arrow::decimal256(nWidth, nPrecision);
197 : else
198 8 : dt = arrow::decimal128(nWidth, nPrecision);
199 : }
200 44 : else if (eSubDT == OFSTFloat32)
201 7 : dt = arrow::float32();
202 : else
203 37 : dt = arrow::float64();
204 52 : break;
205 : }
206 :
207 270 : case OFTString:
208 : case OFTWideString:
209 270 : if ((eSubDT != OFSTNone && eSubDT != OFSTJSON) || nWidth > 0)
210 0 : bNeedGDALSchema = true;
211 270 : dt = arrow::utf8();
212 270 : break;
213 :
214 14 : case OFTBinary:
215 14 : if (nWidth != 0)
216 4 : dt = arrow::fixed_size_binary(nWidth);
217 : else
218 10 : dt = arrow::binary();
219 14 : break;
220 :
221 48 : case OFTIntegerList:
222 48 : if (eSubDT == OFSTBoolean)
223 8 : dt = arrow::list(arrow::boolean());
224 40 : else if (eSubDT == OFSTInt16)
225 0 : dt = arrow::list(arrow::int16());
226 : else
227 40 : dt = arrow::list(arrow::int32());
228 48 : break;
229 :
230 20 : case OFTInteger64List:
231 20 : dt = arrow::list(arrow::int64());
232 20 : break;
233 :
234 35 : case OFTRealList:
235 35 : if (eSubDT == OFSTFloat32)
236 11 : dt = arrow::list(arrow::float32());
237 : else
238 24 : dt = arrow::list(arrow::float64());
239 35 : break;
240 :
241 12 : case OFTStringList:
242 : case OFTWideStringList:
243 12 : dt = arrow::list(arrow::utf8());
244 12 : break;
245 :
246 31 : case OFTDate:
247 31 : dt = arrow::date32();
248 31 : break;
249 :
250 8 : case OFTTime:
251 8 : dt = arrow::time32(arrow::TimeUnit::MILLI);
252 8 : break;
253 :
254 47 : case OFTDateTime:
255 : {
256 47 : const int nTZFlag = poFieldDefn->GetTZFlag();
257 47 : if (nTZFlag >= OGR_TZFLAG_MIXED_TZ)
258 : {
259 12 : m_anTZFlag[i] = nTZFlag;
260 : }
261 47 : dt = arrow::timestamp(arrow::TimeUnit::MILLI);
262 47 : break;
263 : }
264 : }
265 :
266 628 : auto field = arrow::field(poFieldDefn->GetNameRef(), std::move(dt),
267 1884 : poFieldDefn->IsNullable());
268 628 : if (eDT == OFTString && eSubDT == OFSTJSON)
269 : {
270 83 : auto kvMetadata = std::make_shared<arrow::KeyValueMetadata>();
271 83 : kvMetadata->Append(ARROW_EXTENSION_NAME_KEY,
272 : EXTENSION_NAME_ARROW_JSON);
273 83 : field = field->WithMetadata(kvMetadata);
274 : }
275 :
276 628 : fields.emplace_back(std::move(field));
277 628 : if (poFieldDefn->GetAlternativeNameRef()[0])
278 2 : bNeedGDALSchema = true;
279 628 : if (!poFieldDefn->GetComment().empty())
280 3 : bNeedGDALSchema = true;
281 : }
282 :
283 805 : for (int i = 0; i < m_poFeatureDefn->GetGeomFieldCount(); ++i)
284 : {
285 402 : const auto poGeomFieldDefn = m_poFeatureDefn->GetGeomFieldDefn(i);
286 402 : const auto eGType = poGeomFieldDefn->GetType();
287 : const int nDim =
288 402 : 2 + (OGR_GT_HasZ(eGType) ? 1 : 0) + (OGR_GT_HasM(eGType) ? 1 : 0);
289 :
290 402 : const bool pointFieldNullable = GetDriverUCName() == "PARQUET";
291 :
292 : // Fixed Size List GeoArrow encoding
293 : const auto getFixedSizeListOfPoint =
294 228 : [nDim, eGType, pointFieldNullable]()
295 : {
296 : return arrow::fixed_size_list(
297 146 : arrow::field(nDim == 2 ? "xy"
298 30 : : nDim == 3 ? (OGR_GT_HasZ(eGType) ? "xyz" : "xym")
299 : : "xyzm",
300 : arrow::float64(), pointFieldNullable),
301 116 : nDim);
302 402 : };
303 :
304 : // Struct GeoArrow encoding
305 1206 : auto xField(arrow::field("x", arrow::float64(), false));
306 1206 : auto yField(arrow::field("y", arrow::float64(), false));
307 : std::vector<std::shared_ptr<arrow::Field>> pointFields{
308 : arrow::field("x", arrow::float64(), false),
309 2412 : arrow::field("y", arrow::float64(), false)};
310 402 : if (OGR_GT_HasZ(eGType))
311 : pointFields.emplace_back(
312 121 : arrow::field("z", arrow::float64(), false));
313 402 : if (OGR_GT_HasM(eGType))
314 : pointFields.emplace_back(
315 52 : arrow::field("m", arrow::float64(), false));
316 804 : auto pointStructType(arrow::struct_(std::move(pointFields)));
317 :
318 40 : const auto getListOfVertices = [&getFixedSizeListOfPoint]()
319 : {
320 80 : return arrow::list(std::make_shared<arrow::Field>(
321 120 : "vertices", getFixedSizeListOfPoint()));
322 402 : };
323 :
324 22 : const auto getListOfRings = [&getListOfVertices]()
325 : {
326 : return arrow::list(
327 44 : std::make_shared<arrow::Field>("rings", getListOfVertices()));
328 402 : };
329 :
330 104 : const auto getListOfVerticesStruct = [&pointStructType]()
331 : {
332 : return arrow::list(
333 208 : std::make_shared<arrow::Field>("vertices", pointStructType));
334 402 : };
335 :
336 60 : const auto getListOfRingsStruct = [&getListOfVerticesStruct]()
337 : {
338 120 : return arrow::list(std::make_shared<arrow::Field>(
339 180 : "rings", getListOfVerticesStruct()));
340 402 : };
341 :
342 402 : std::shared_ptr<arrow::DataType> dt;
343 402 : switch (m_aeGeomEncoding[i])
344 : {
345 134 : case OGRArrowGeomEncoding::WKB:
346 134 : dt = arrow::binary();
347 134 : break;
348 :
349 53 : case OGRArrowGeomEncoding::WKT:
350 53 : dt = arrow::utf8();
351 53 : break;
352 :
353 0 : case OGRArrowGeomEncoding::GEOARROW_FSL_GENERIC:
354 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_GENERIC:
355 0 : CPLAssert(false);
356 : break;
357 :
358 9 : case OGRArrowGeomEncoding::GEOARROW_FSL_POINT:
359 9 : dt = getFixedSizeListOfPoint();
360 9 : break;
361 :
362 9 : case OGRArrowGeomEncoding::GEOARROW_FSL_LINESTRING:
363 9 : dt = getListOfVertices();
364 9 : break;
365 :
366 11 : case OGRArrowGeomEncoding::GEOARROW_FSL_POLYGON:
367 11 : dt = getListOfRings();
368 11 : break;
369 :
370 9 : case OGRArrowGeomEncoding::GEOARROW_FSL_MULTIPOINT:
371 18 : dt = arrow::list(std::make_shared<arrow::Field>(
372 27 : "points", getFixedSizeListOfPoint()));
373 9 : break;
374 :
375 9 : case OGRArrowGeomEncoding::GEOARROW_FSL_MULTILINESTRING:
376 18 : dt = arrow::list(std::make_shared<arrow::Field>(
377 27 : "linestrings", getListOfVertices()));
378 9 : break;
379 :
380 11 : case OGRArrowGeomEncoding::GEOARROW_FSL_MULTIPOLYGON:
381 22 : dt = arrow::list(std::make_shared<arrow::Field>(
382 33 : "polygons", getListOfRings()));
383 11 : break;
384 :
385 31 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_POINT:
386 31 : dt = pointStructType;
387 31 : break;
388 :
389 22 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_LINESTRING:
390 22 : dt = getListOfVerticesStruct();
391 22 : break;
392 :
393 30 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_POLYGON:
394 30 : dt = getListOfRingsStruct();
395 30 : break;
396 :
397 22 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_MULTIPOINT:
398 44 : dt = arrow::list(
399 66 : std::make_shared<arrow::Field>("points", pointStructType));
400 22 : break;
401 :
402 22 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_MULTILINESTRING:
403 44 : dt = arrow::list(std::make_shared<arrow::Field>(
404 66 : "linestrings", getListOfVerticesStruct()));
405 22 : break;
406 :
407 30 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_MULTIPOLYGON:
408 60 : dt = arrow::list(std::make_shared<arrow::Field>(
409 90 : "polygons", getListOfRingsStruct()));
410 30 : break;
411 : }
412 :
413 : std::shared_ptr<arrow::Field> field(
414 402 : arrow::field(poGeomFieldDefn->GetNameRef(), std::move(dt),
415 1206 : poGeomFieldDefn->IsNullable()));
416 402 : if (m_bWriteFieldArrowExtensionName)
417 : {
418 136 : auto kvMetadata = field->metadata()
419 136 : ? field->metadata()->Copy()
420 136 : : std::make_shared<arrow::KeyValueMetadata>();
421 272 : kvMetadata->Append(
422 : ARROW_EXTENSION_NAME_KEY,
423 136 : GetGeomEncodingAsString(m_aeGeomEncoding[i], false));
424 136 : field = field->WithMetadata(kvMetadata);
425 : }
426 :
427 402 : m_apoBaseStructGeomType.emplace_back(std::move(pointStructType));
428 :
429 402 : fields.emplace_back(std::move(field));
430 : }
431 :
432 403 : if (m_bWriteBBoxStruct)
433 : {
434 389 : for (int i = 0; i < m_poFeatureDefn->GetGeomFieldCount(); ++i)
435 : {
436 197 : const auto poGeomFieldDefn = m_poFeatureDefn->GetGeomFieldDefn(i);
437 591 : auto bbox_field_xmin(arrow::field("xmin", arrow::float32(), false));
438 591 : auto bbox_field_ymin(arrow::field("ymin", arrow::float32(), false));
439 591 : auto bbox_field_xmax(arrow::field("xmax", arrow::float32(), false));
440 591 : auto bbox_field_ymax(arrow::field("ymax", arrow::float32(), false));
441 : auto bbox_field(arrow::field(
442 : CPLGetConfigOption("OGR_PARQUET_COVERING_BBOX_NAME",
443 394 : std::string(poGeomFieldDefn->GetNameRef())
444 197 : .append("_bbox")
445 : .c_str()),
446 1182 : arrow::struct_(
447 197 : {std::move(bbox_field_xmin), std::move(bbox_field_ymin),
448 1182 : std::move(bbox_field_xmax), std::move(bbox_field_ymax)}),
449 1182 : poGeomFieldDefn->IsNullable()));
450 197 : fields.emplace_back(bbox_field);
451 197 : m_apoFieldsBBOX.emplace_back(bbox_field);
452 : }
453 : }
454 :
455 403 : m_aoEnvelopes.resize(m_poFeatureDefn->GetGeomFieldCount());
456 403 : m_oSetWrittenGeometryTypes.resize(m_poFeatureDefn->GetGeomFieldCount());
457 :
458 403 : m_poSchema = arrow::schema(std::move(fields));
459 403 : CPLAssert(m_poSchema);
460 424 : if (bNeedGDALSchema &&
461 21 : CPLTestBool(CPLGetConfigOption(
462 424 : ("OGR_" + GetDriverUCName() + "_WRITE_GDAL_SCHEMA").c_str(),
463 : "YES")))
464 : {
465 42 : CPLJSONObject oRoot;
466 42 : CPLJSONObject oColumns;
467 :
468 21 : if (!m_osFIDColumn.empty())
469 18 : oRoot.Add("fid", m_osFIDColumn);
470 :
471 21 : oRoot.Add("columns", oColumns);
472 202 : for (int i = 0; i < m_poFeatureDefn->GetFieldCount(); ++i)
473 : {
474 181 : const auto poFieldDefn = m_poFeatureDefn->GetFieldDefn(i);
475 362 : CPLJSONObject oColumn;
476 181 : oColumns.Add(poFieldDefn->GetNameRef(), oColumn);
477 181 : oColumn.Add("type", OGR_GetFieldTypeName(poFieldDefn->GetType()));
478 181 : const auto eSubDT = poFieldDefn->GetSubType();
479 181 : if (eSubDT != OFSTNone)
480 56 : oColumn.Add("subtype", OGR_GetFieldSubTypeName(eSubDT));
481 181 : const int nWidth = poFieldDefn->GetWidth();
482 181 : if (nWidth > 0)
483 6 : oColumn.Add("width", nWidth);
484 181 : const int nPrecision = poFieldDefn->GetPrecision();
485 181 : if (nPrecision > 0)
486 4 : oColumn.Add("precision", nPrecision);
487 181 : if (poFieldDefn->GetAlternativeNameRef()[0])
488 2 : oColumn.Add("alternative_name",
489 : poFieldDefn->GetAlternativeNameRef());
490 181 : if (!poFieldDefn->GetComment().empty())
491 3 : oColumn.Add("comment", poFieldDefn->GetComment());
492 : }
493 :
494 21 : auto kvMetadata = m_poSchema->metadata()
495 0 : ? m_poSchema->metadata()->Copy()
496 42 : : std::make_shared<arrow::KeyValueMetadata>();
497 42 : kvMetadata->Append("gdal:schema",
498 42 : oRoot.Format(CPLJSONObject::PrettyFormat::Plain));
499 21 : m_poSchema = m_poSchema->WithMetadata(kvMetadata);
500 21 : CPLAssert(m_poSchema);
501 : }
502 403 : }
503 :
504 : /************************************************************************/
505 : /* FinalizeSchema() */
506 : /************************************************************************/
507 :
508 348 : inline void OGRArrowWriterLayer::FinalizeSchema()
509 : {
510 : // Final tuning of schema taking into actual timezone values
511 : // from features
512 348 : int nArrowIdxFirstField = !m_osFIDColumn.empty() ? 1 : 0;
513 974 : for (int i = 0; i < m_poFeatureDefn->GetFieldCount(); ++i)
514 : {
515 626 : if (m_anTZFlag[i] >= OGR_TZFLAG_MIXED_TZ)
516 : {
517 12 : const int nOffset = m_anTZFlag[i] == OGR_TZFLAG_UTC
518 12 : ? 0
519 8 : : (m_anTZFlag[i] - OGR_TZFLAG_UTC) * 15;
520 12 : int nHours = static_cast<int>(nOffset / 60); // Round towards zero.
521 12 : const int nMinutes = std::abs(nOffset - nHours * 60);
522 :
523 : const std::string osTZ =
524 : CPLSPrintf("%c%02d:%02d", nOffset >= 0 ? '+' : '-',
525 24 : std::abs(nHours), nMinutes);
526 24 : auto dt = arrow::timestamp(arrow::TimeUnit::MILLI, osTZ);
527 12 : const auto poFieldDefn = m_poFeatureDefn->GetFieldDefn(i);
528 12 : auto field = arrow::field(poFieldDefn->GetNameRef(), std::move(dt),
529 36 : poFieldDefn->IsNullable());
530 24 : auto result = m_poSchema->SetField(nArrowIdxFirstField + i, field);
531 12 : if (!result.ok())
532 : {
533 0 : CPLError(CE_Warning, CPLE_AppDefined,
534 : "Schema::SetField() failed with %s",
535 0 : result.status().message().c_str());
536 : }
537 : else
538 : {
539 12 : m_poSchema = *result;
540 : }
541 : }
542 : }
543 348 : }
544 :
545 : /************************************************************************/
546 : /* AddFieldDomain() */
547 : /************************************************************************/
548 :
549 : inline bool
550 11 : OGRArrowWriterLayer::AddFieldDomain(std::unique_ptr<OGRFieldDomain> &&domain,
551 : std::string &failureReason)
552 : {
553 11 : if (domain->GetDomainType() != OFDT_CODED)
554 : {
555 0 : failureReason = "Only coded field domains are supported by Arrow";
556 0 : return false;
557 : }
558 :
559 : const OGRCodedFieldDomain *poDomain =
560 11 : static_cast<const OGRCodedFieldDomain *>(domain.get());
561 11 : const OGRCodedValue *psIter = poDomain->GetEnumeration();
562 :
563 : auto poStringBuilder =
564 22 : std::make_shared<arrow::StringBuilder>(m_poMemoryPool);
565 :
566 11 : int nLastCode = -1;
567 44 : for (; psIter->pszCode; ++psIter)
568 : {
569 33 : if (CPLGetValueType(psIter->pszCode) != CPL_VALUE_INTEGER)
570 : {
571 0 : failureReason = "Non integer code in domain ";
572 0 : failureReason += domain->GetName();
573 0 : return false;
574 : }
575 33 : int nCode = atoi(psIter->pszCode);
576 33 : if (nCode <= nLastCode || nCode - nLastCode > 100)
577 : {
578 0 : failureReason = "Too sparse codes in domain ";
579 0 : failureReason += domain->GetName();
580 0 : return false;
581 : }
582 33 : for (int i = nLastCode + 1; i < nCode; ++i)
583 : {
584 0 : OGR_ARROW_RETURN_FALSE_NOT_OK(poStringBuilder->AppendNull());
585 : }
586 33 : if (psIter->pszValue)
587 33 : OGR_ARROW_RETURN_FALSE_NOT_OK(
588 : poStringBuilder->Append(psIter->pszValue));
589 : else
590 0 : OGR_ARROW_RETURN_FALSE_NOT_OK(poStringBuilder->AppendNull());
591 33 : nLastCode = nCode;
592 : }
593 :
594 11 : std::shared_ptr<arrow::Array> stringArray;
595 22 : auto status = poStringBuilder->Finish(&stringArray);
596 11 : if (!status.ok())
597 : {
598 0 : CPLError(CE_Failure, CPLE_AppDefined,
599 : "StringArray::Finish() failed with %s",
600 0 : status.message().c_str());
601 0 : return false;
602 : }
603 :
604 11 : m_oMapFieldDomainToStringArray[domain->GetName()] = std::move(stringArray);
605 11 : m_oMapFieldDomains[domain->GetName()] = std::move(domain);
606 11 : return true;
607 : }
608 :
609 : /************************************************************************/
610 : /* GetFieldDomainNames() */
611 : /************************************************************************/
612 :
613 0 : inline std::vector<std::string> OGRArrowWriterLayer::GetFieldDomainNames() const
614 : {
615 0 : std::vector<std::string> names;
616 0 : names.reserve(m_oMapFieldDomains.size());
617 0 : for (const auto &it : m_oMapFieldDomains)
618 : {
619 0 : names.emplace_back(it.first);
620 : }
621 0 : return names;
622 : }
623 :
624 : /************************************************************************/
625 : /* GetFieldDomain() */
626 : /************************************************************************/
627 :
628 : inline const OGRFieldDomain *
629 15 : OGRArrowWriterLayer::GetFieldDomain(const std::string &name) const
630 : {
631 15 : const auto iter = m_oMapFieldDomains.find(name);
632 15 : if (iter == m_oMapFieldDomains.end())
633 11 : return nullptr;
634 4 : return iter->second.get();
635 : }
636 :
637 : /************************************************************************/
638 : /* CreateField() */
639 : /************************************************************************/
640 :
641 629 : inline OGRErr OGRArrowWriterLayer::CreateField(const OGRFieldDefn *poField,
642 : int /* bApproxOK */)
643 : {
644 629 : if (m_poSchema)
645 : {
646 1 : CPLError(CE_Failure, CPLE_NotSupported,
647 : "Cannot add field after a first feature has been written");
648 1 : return OGRERR_FAILURE;
649 : }
650 628 : if (!m_apoFieldsFromArrowSchema.empty())
651 : {
652 0 : CPLError(CE_Failure, CPLE_NotSupported,
653 : "Cannot mix calls to CreateField() and "
654 : "CreateFieldFromArrowSchema()");
655 0 : return OGRERR_FAILURE;
656 : }
657 628 : m_poFeatureDefn->AddFieldDefn(poField);
658 628 : return OGRERR_NONE;
659 : }
660 :
661 : /************************************************************************/
662 : /* OGRLayer::CreateFieldFromArrowSchema() */
663 : /************************************************************************/
664 :
665 998 : inline bool OGRArrowWriterLayer::CreateFieldFromArrowSchema(
666 : const struct ArrowSchema *schema, CSLConstList /*papszOptions*/)
667 : {
668 998 : if (m_poSchema)
669 : {
670 0 : CPLError(CE_Failure, CPLE_NotSupported,
671 : "Cannot add field after a first feature has been written");
672 0 : return false;
673 : }
674 :
675 998 : if (m_poFeatureDefn->GetFieldCount())
676 : {
677 0 : CPLError(CE_Failure, CPLE_NotSupported,
678 : "Cannot mix calls to CreateField() and "
679 : "CreateFieldFromArrowSchema()");
680 0 : return false;
681 : }
682 :
683 998 : if (m_osFIDColumn == schema->name)
684 : {
685 0 : CPLError(CE_Failure, CPLE_AppDefined,
686 : "FID column has the same name as this field: %s",
687 0 : schema->name);
688 0 : return false;
689 : }
690 :
691 35951 : for (auto &apoField : m_apoFieldsFromArrowSchema)
692 : {
693 34953 : if (apoField->name() == schema->name)
694 : {
695 0 : CPLError(CE_Failure, CPLE_AppDefined,
696 0 : "Field of name %s already exists", schema->name);
697 0 : return false;
698 : }
699 : }
700 :
701 998 : if (m_poFeatureDefn->GetGeomFieldIndex(schema->name) >= 0)
702 : {
703 0 : CPLError(CE_Failure, CPLE_AppDefined,
704 0 : "Geometry field of name %s already exists", schema->name);
705 0 : return false;
706 : }
707 :
708 : // ImportField() would release the schema, but we don't want that
709 : // So copy the structure content into a local variable, and override its
710 : // release callback to a no-op. This may be a bit fragile, but it doesn't
711 : // look like ImportField implementation tries to access the C ArrowSchema
712 : // after it has been called.
713 998 : struct ArrowSchema lSchema = *schema;
714 998 : const auto DummyFreeSchema = [](struct ArrowSchema *ptrSchema)
715 998 : { ptrSchema->release = nullptr; };
716 998 : lSchema.release = DummyFreeSchema;
717 1996 : auto result = arrow::ImportField(&lSchema);
718 998 : CPLAssert(lSchema.release == nullptr);
719 998 : if (!result.ok())
720 : {
721 0 : CPLError(CE_Failure, CPLE_AppDefined,
722 : "CreateFieldFromArrowSchema() failed");
723 0 : return false;
724 : }
725 998 : m_apoFieldsFromArrowSchema.emplace_back(std::move(*result));
726 998 : return true;
727 : }
728 :
729 : /************************************************************************/
730 : /* GetPreciseArrowGeomEncoding() */
731 : /************************************************************************/
732 :
733 217 : inline OGRArrowGeomEncoding OGRArrowWriterLayer::GetPreciseArrowGeomEncoding(
734 : OGRArrowGeomEncoding eEncodingType, OGRwkbGeometryType eGType)
735 : {
736 217 : CPLAssert(eEncodingType == OGRArrowGeomEncoding::GEOARROW_FSL_GENERIC ||
737 : eEncodingType == OGRArrowGeomEncoding::GEOARROW_STRUCT_GENERIC);
738 217 : const auto eFlatType = wkbFlatten(eGType);
739 217 : if (eFlatType == wkbPoint)
740 : {
741 : return eEncodingType == OGRArrowGeomEncoding::GEOARROW_FSL_GENERIC
742 40 : ? OGRArrowGeomEncoding::GEOARROW_FSL_POINT
743 40 : : OGRArrowGeomEncoding::GEOARROW_STRUCT_POINT;
744 : }
745 177 : else if (eFlatType == wkbLineString)
746 : {
747 : return eEncodingType == OGRArrowGeomEncoding::GEOARROW_FSL_GENERIC
748 31 : ? OGRArrowGeomEncoding::GEOARROW_FSL_LINESTRING
749 31 : : OGRArrowGeomEncoding::GEOARROW_STRUCT_LINESTRING;
750 : }
751 146 : else if (eFlatType == wkbPolygon)
752 : {
753 : return eEncodingType == OGRArrowGeomEncoding::GEOARROW_FSL_GENERIC
754 41 : ? OGRArrowGeomEncoding::GEOARROW_FSL_POLYGON
755 41 : : OGRArrowGeomEncoding::GEOARROW_STRUCT_POLYGON;
756 : }
757 105 : else if (eFlatType == wkbMultiPoint)
758 : {
759 : return eEncodingType == OGRArrowGeomEncoding::GEOARROW_FSL_GENERIC
760 31 : ? OGRArrowGeomEncoding::GEOARROW_FSL_MULTIPOINT
761 31 : : OGRArrowGeomEncoding::GEOARROW_STRUCT_MULTIPOINT;
762 : }
763 74 : else if (eFlatType == wkbMultiLineString)
764 : {
765 : return eEncodingType == OGRArrowGeomEncoding::GEOARROW_FSL_GENERIC
766 31 : ? OGRArrowGeomEncoding::GEOARROW_FSL_MULTILINESTRING
767 31 : : OGRArrowGeomEncoding::GEOARROW_STRUCT_MULTILINESTRING;
768 : }
769 43 : else if (eFlatType == wkbMultiPolygon)
770 : {
771 : return eEncodingType == OGRArrowGeomEncoding::GEOARROW_FSL_GENERIC
772 41 : ? OGRArrowGeomEncoding::GEOARROW_FSL_MULTIPOLYGON
773 41 : : OGRArrowGeomEncoding::GEOARROW_STRUCT_MULTIPOLYGON;
774 : }
775 : else
776 : {
777 2 : CPLError(CE_Failure, CPLE_NotSupported,
778 : "GeoArrow encoding is currently not supported for %s",
779 : OGRGeometryTypeToName(eGType));
780 2 : return eEncodingType;
781 : }
782 : }
783 :
784 : /************************************************************************/
785 : /* GetGeomEncodingAsString() */
786 : /************************************************************************/
787 :
788 : inline const char *
789 650 : OGRArrowWriterLayer::GetGeomEncodingAsString(OGRArrowGeomEncoding eGeomEncoding,
790 : bool bForParquetGeo)
791 : {
792 650 : switch (eGeomEncoding)
793 : {
794 181 : case OGRArrowGeomEncoding::WKB:
795 181 : return bForParquetGeo ? "WKB" : "geoarrow.wkb";
796 111 : case OGRArrowGeomEncoding::WKT:
797 111 : return bForParquetGeo ? "WKT" : "geoarrow.wkt";
798 0 : case OGRArrowGeomEncoding::GEOARROW_FSL_GENERIC:
799 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_GENERIC:
800 0 : CPLAssert(false);
801 : break;
802 19 : case OGRArrowGeomEncoding::GEOARROW_FSL_POINT:
803 19 : return "geoarrow.point";
804 19 : case OGRArrowGeomEncoding::GEOARROW_FSL_LINESTRING:
805 19 : return "geoarrow.linestring";
806 21 : case OGRArrowGeomEncoding::GEOARROW_FSL_POLYGON:
807 21 : return "geoarrow.polygon";
808 19 : case OGRArrowGeomEncoding::GEOARROW_FSL_MULTIPOINT:
809 19 : return "geoarrow.multipoint";
810 19 : case OGRArrowGeomEncoding::GEOARROW_FSL_MULTILINESTRING:
811 19 : return "geoarrow.multilinestring";
812 21 : case OGRArrowGeomEncoding::GEOARROW_FSL_MULTIPOLYGON:
813 21 : return "geoarrow.multipolygon";
814 54 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_POINT:
815 54 : return bForParquetGeo ? "point" : "geoarrow.point";
816 34 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_LINESTRING:
817 34 : return bForParquetGeo ? "linestring" : "geoarrow.linestring";
818 42 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_POLYGON:
819 42 : return bForParquetGeo ? "polygon" : "geoarrow.polygon";
820 34 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_MULTIPOINT:
821 34 : return bForParquetGeo ? "multipoint" : "geoarrow.multipoint";
822 34 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_MULTILINESTRING:
823 34 : return bForParquetGeo ? "multilinestring"
824 34 : : "geoarrow.multilinestring";
825 42 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_MULTIPOLYGON:
826 42 : return bForParquetGeo ? "multipolygon" : "geoarrow.multipolygon";
827 : }
828 0 : return nullptr;
829 : }
830 :
831 : /************************************************************************/
832 : /* CreateGeomField() */
833 : /************************************************************************/
834 :
835 : inline OGRErr
836 27 : OGRArrowWriterLayer::CreateGeomField(const OGRGeomFieldDefn *poField,
837 : int /* bApproxOK */)
838 : {
839 27 : if (m_poSchema)
840 : {
841 1 : CPLError(CE_Failure, CPLE_NotSupported,
842 : "Cannot add field after a first feature has been written");
843 1 : return OGRERR_FAILURE;
844 : }
845 26 : const auto eGType = poField->GetType();
846 26 : if (!IsSupportedGeometryType(eGType))
847 : {
848 0 : return OGRERR_FAILURE;
849 : }
850 :
851 26 : if (IsSRSRequired() && poField->GetSpatialRef() == nullptr)
852 : {
853 0 : CPLError(CE_Warning, CPLE_AppDefined,
854 : "Geometry column should have an associated CRS");
855 : }
856 26 : auto eGeomEncoding = m_eGeomEncoding;
857 26 : if (eGeomEncoding == OGRArrowGeomEncoding::GEOARROW_FSL_GENERIC ||
858 26 : eGeomEncoding == OGRArrowGeomEncoding::GEOARROW_STRUCT_GENERIC)
859 : {
860 0 : const auto eEncodingType = eGeomEncoding;
861 0 : eGeomEncoding = GetPreciseArrowGeomEncoding(eEncodingType, eGType);
862 0 : if (eGeomEncoding == eEncodingType)
863 0 : return OGRERR_FAILURE;
864 : }
865 26 : m_aeGeomEncoding.push_back(eGeomEncoding);
866 26 : m_poFeatureDefn->AddGeomFieldDefn(poField);
867 26 : return OGRERR_NONE;
868 : }
869 :
870 : /************************************************************************/
871 : /* MakeGeoArrowBuilder() */
872 : /************************************************************************/
873 :
874 : static std::shared_ptr<arrow::ArrayBuilder>
875 134 : MakeGeoArrowBuilder(arrow::MemoryPool *poMemoryPool, int nDim, int nDepth)
876 : {
877 134 : if (nDepth == 0)
878 104 : return std::make_shared<arrow::FixedSizeListBuilder>(
879 104 : poMemoryPool, std::make_shared<arrow::DoubleBuilder>(poMemoryPool),
880 52 : nDim);
881 : else
882 164 : return std::make_shared<arrow::ListBuilder>(
883 246 : poMemoryPool, MakeGeoArrowBuilder(poMemoryPool, nDim, nDepth - 1));
884 : }
885 :
886 : /************************************************************************/
887 : /* MakeGeoArrowStructBuilder() */
888 : /************************************************************************/
889 :
890 : static std::shared_ptr<arrow::ArrayBuilder>
891 384 : MakeGeoArrowStructBuilder(arrow::MemoryPool *poMemoryPool, int nDim, int nDepth,
892 : const std::shared_ptr<arrow::DataType> &eBaseType)
893 : {
894 384 : if (nDepth == 0)
895 : {
896 155 : std::vector<std::shared_ptr<arrow::ArrayBuilder>> builders;
897 537 : for (int i = 0; i < nDim; ++i)
898 : builders.emplace_back(
899 382 : std::make_shared<arrow::DoubleBuilder>(poMemoryPool));
900 310 : return std::make_shared<arrow::StructBuilder>(eBaseType, poMemoryPool,
901 310 : std::move(builders));
902 : }
903 : else
904 458 : return std::make_shared<arrow::ListBuilder>(
905 458 : poMemoryPool, MakeGeoArrowStructBuilder(poMemoryPool, nDim,
906 229 : nDepth - 1, eBaseType));
907 : }
908 :
909 : /************************************************************************/
910 : /* ClearArrayBuilers() */
911 : /************************************************************************/
912 :
913 266 : inline void OGRArrowWriterLayer::ClearArrayBuilers()
914 : {
915 266 : m_apoBuilders.clear();
916 266 : m_apoBuildersBBOXStruct.clear();
917 266 : m_apoBuildersBBOXXMin.clear();
918 266 : m_apoBuildersBBOXYMin.clear();
919 266 : m_apoBuildersBBOXXMax.clear();
920 266 : m_apoBuildersBBOXYMax.clear();
921 266 : }
922 :
923 : /************************************************************************/
924 : /* CreateArrayBuilders() */
925 : /************************************************************************/
926 :
927 385 : inline void OGRArrowWriterLayer::CreateArrayBuilders()
928 : {
929 385 : m_apoBuilders.reserve(1 + m_poFeatureDefn->GetFieldCount() +
930 385 : m_poFeatureDefn->GetGeomFieldCount());
931 :
932 385 : int nArrowIdx = 0;
933 385 : if (!m_osFIDColumn.empty())
934 : {
935 49 : m_apoBuilders.emplace_back(std::make_shared<arrow::Int64Builder>());
936 49 : nArrowIdx++;
937 : }
938 :
939 1727 : for (int i = 0; i < m_poFeatureDefn->GetFieldCount(); ++i, ++nArrowIdx)
940 : {
941 1342 : const auto poFieldDefn = m_poFeatureDefn->GetFieldDefn(i);
942 1342 : std::shared_ptr<arrow::ArrayBuilder> builder;
943 1342 : const auto eSubDT = poFieldDefn->GetSubType();
944 1342 : switch (poFieldDefn->GetType())
945 : {
946 145 : case OFTInteger:
947 145 : if (eSubDT == OFSTBoolean)
948 : builder =
949 12 : std::make_shared<arrow::BooleanBuilder>(m_poMemoryPool);
950 133 : else if (eSubDT == OFSTInt16)
951 : builder =
952 12 : std::make_shared<arrow::Int16Builder>(m_poMemoryPool);
953 : else
954 : builder =
955 121 : std::make_shared<arrow::Int32Builder>(m_poMemoryPool);
956 145 : break;
957 :
958 74 : case OFTInteger64:
959 74 : builder = std::make_shared<arrow::Int64Builder>(m_poMemoryPool);
960 74 : break;
961 :
962 106 : case OFTReal:
963 : {
964 212 : const auto arrowType = m_poSchema->fields()[nArrowIdx]->type();
965 106 : if (arrowType->id() == arrow::Type::DECIMAL128)
966 24 : builder = std::make_shared<arrow::Decimal128Builder>(
967 24 : arrowType, m_poMemoryPool);
968 82 : else if (arrowType->id() == arrow::Type::DECIMAL256)
969 0 : builder = std::make_shared<arrow::Decimal256Builder>(
970 0 : arrowType, m_poMemoryPool);
971 82 : else if (eSubDT == OFSTFloat32)
972 : builder =
973 21 : std::make_shared<arrow::FloatBuilder>(m_poMemoryPool);
974 : else
975 : builder =
976 61 : std::make_shared<arrow::DoubleBuilder>(m_poMemoryPool);
977 106 : break;
978 : }
979 :
980 468 : case OFTString:
981 : case OFTWideString:
982 : builder =
983 468 : std::make_shared<arrow::StringBuilder>(m_poMemoryPool);
984 468 : break;
985 :
986 38 : case OFTBinary:
987 38 : if (poFieldDefn->GetWidth() != 0)
988 24 : builder = std::make_shared<arrow::FixedSizeBinaryBuilder>(
989 24 : arrow::fixed_size_binary(poFieldDefn->GetWidth()),
990 24 : m_poMemoryPool);
991 : else
992 : builder =
993 26 : std::make_shared<arrow::BinaryBuilder>(m_poMemoryPool);
994 38 : break;
995 :
996 144 : case OFTIntegerList:
997 : {
998 144 : std::shared_ptr<arrow::ArrayBuilder> poBaseBuilder;
999 144 : if (eSubDT == OFSTBoolean)
1000 : poBaseBuilder =
1001 24 : std::make_shared<arrow::BooleanBuilder>(m_poMemoryPool);
1002 120 : else if (eSubDT == OFSTInt16)
1003 : poBaseBuilder =
1004 0 : std::make_shared<arrow::Int16Builder>(m_poMemoryPool);
1005 : else
1006 : poBaseBuilder =
1007 120 : std::make_shared<arrow::Int32Builder>(m_poMemoryPool);
1008 288 : builder = std::make_shared<arrow::ListBuilder>(m_poMemoryPool,
1009 144 : poBaseBuilder);
1010 144 : break;
1011 : }
1012 :
1013 60 : case OFTInteger64List:
1014 60 : builder = std::make_shared<arrow::ListBuilder>(
1015 60 : m_poMemoryPool,
1016 180 : std::make_shared<arrow::Int64Builder>(m_poMemoryPool));
1017 :
1018 60 : break;
1019 :
1020 105 : case OFTRealList:
1021 105 : if (eSubDT == OFSTFloat32)
1022 33 : builder = std::make_shared<arrow::ListBuilder>(
1023 33 : m_poMemoryPool,
1024 99 : std::make_shared<arrow::FloatBuilder>(m_poMemoryPool));
1025 : else
1026 72 : builder = std::make_shared<arrow::ListBuilder>(
1027 72 : m_poMemoryPool,
1028 216 : std::make_shared<arrow::DoubleBuilder>(m_poMemoryPool));
1029 105 : break;
1030 :
1031 36 : case OFTStringList:
1032 : case OFTWideStringList:
1033 36 : builder = std::make_shared<arrow::ListBuilder>(
1034 36 : m_poMemoryPool,
1035 108 : std::make_shared<arrow::StringBuilder>(m_poMemoryPool));
1036 :
1037 36 : break;
1038 :
1039 47 : case OFTDate:
1040 : builder =
1041 47 : std::make_shared<arrow::Date32Builder>(m_poMemoryPool);
1042 47 : break;
1043 :
1044 24 : case OFTTime:
1045 48 : builder = std::make_shared<arrow::Time32Builder>(
1046 72 : arrow::time32(arrow::TimeUnit::MILLI), m_poMemoryPool);
1047 24 : break;
1048 :
1049 95 : case OFTDateTime:
1050 190 : builder = std::make_shared<arrow::TimestampBuilder>(
1051 285 : arrow::timestamp(arrow::TimeUnit::MILLI), m_poMemoryPool);
1052 95 : break;
1053 : }
1054 1342 : m_apoBuilders.emplace_back(builder);
1055 : }
1056 :
1057 769 : for (int i = 0; i < m_poFeatureDefn->GetGeomFieldCount(); ++i, ++nArrowIdx)
1058 : {
1059 384 : std::shared_ptr<arrow::ArrayBuilder> builder;
1060 384 : const auto poGeomFieldDefn = m_poFeatureDefn->GetGeomFieldDefn(i);
1061 384 : const auto eGType = poGeomFieldDefn->GetType();
1062 : const int nDim =
1063 384 : 2 + (OGR_GT_HasZ(eGType) ? 1 : 0) + (OGR_GT_HasM(eGType) ? 1 : 0);
1064 :
1065 384 : switch (m_aeGeomEncoding[i])
1066 : {
1067 124 : case OGRArrowGeomEncoding::WKB:
1068 : builder =
1069 124 : std::make_shared<arrow::BinaryBuilder>(m_poMemoryPool);
1070 124 : break;
1071 :
1072 53 : case OGRArrowGeomEncoding::WKT:
1073 : builder =
1074 53 : std::make_shared<arrow::StringBuilder>(m_poMemoryPool);
1075 53 : break;
1076 :
1077 8 : case OGRArrowGeomEncoding::GEOARROW_FSL_POINT:
1078 8 : builder = MakeGeoArrowBuilder(m_poMemoryPool, nDim, 0);
1079 8 : break;
1080 :
1081 8 : case OGRArrowGeomEncoding::GEOARROW_FSL_LINESTRING:
1082 8 : builder = MakeGeoArrowBuilder(m_poMemoryPool, nDim, 1);
1083 8 : break;
1084 :
1085 10 : case OGRArrowGeomEncoding::GEOARROW_FSL_POLYGON:
1086 10 : builder = MakeGeoArrowBuilder(m_poMemoryPool, nDim, 2);
1087 10 : break;
1088 :
1089 8 : case OGRArrowGeomEncoding::GEOARROW_FSL_MULTIPOINT:
1090 8 : builder = MakeGeoArrowBuilder(m_poMemoryPool, nDim, 1);
1091 8 : break;
1092 :
1093 8 : case OGRArrowGeomEncoding::GEOARROW_FSL_MULTILINESTRING:
1094 8 : builder = MakeGeoArrowBuilder(m_poMemoryPool, nDim, 2);
1095 8 : break;
1096 :
1097 10 : case OGRArrowGeomEncoding::GEOARROW_FSL_MULTIPOLYGON:
1098 10 : builder = MakeGeoArrowBuilder(m_poMemoryPool, nDim, 3);
1099 10 : break;
1100 :
1101 34 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_POINT:
1102 68 : builder = MakeGeoArrowStructBuilder(m_poMemoryPool, nDim, 0,
1103 68 : m_apoBaseStructGeomType[i]);
1104 34 : break;
1105 :
1106 21 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_LINESTRING:
1107 42 : builder = MakeGeoArrowStructBuilder(m_poMemoryPool, nDim, 1,
1108 42 : m_apoBaseStructGeomType[i]);
1109 21 : break;
1110 :
1111 29 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_POLYGON:
1112 58 : builder = MakeGeoArrowStructBuilder(m_poMemoryPool, nDim, 2,
1113 58 : m_apoBaseStructGeomType[i]);
1114 29 : break;
1115 :
1116 21 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_MULTIPOINT:
1117 42 : builder = MakeGeoArrowStructBuilder(m_poMemoryPool, nDim, 1,
1118 42 : m_apoBaseStructGeomType[i]);
1119 21 : break;
1120 :
1121 21 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_MULTILINESTRING:
1122 42 : builder = MakeGeoArrowStructBuilder(m_poMemoryPool, nDim, 2,
1123 42 : m_apoBaseStructGeomType[i]);
1124 21 : break;
1125 :
1126 29 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_MULTIPOLYGON:
1127 58 : builder = MakeGeoArrowStructBuilder(m_poMemoryPool, nDim, 3,
1128 58 : m_apoBaseStructGeomType[i]);
1129 29 : break;
1130 :
1131 0 : case OGRArrowGeomEncoding::GEOARROW_FSL_GENERIC:
1132 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_GENERIC:
1133 0 : CPLAssert(false);
1134 : break;
1135 : }
1136 :
1137 384 : m_apoBuilders.emplace_back(builder);
1138 :
1139 384 : if (m_bWriteBBoxStruct)
1140 : {
1141 : m_apoBuildersBBOXXMin.emplace_back(
1142 187 : std::make_shared<arrow::FloatBuilder>(m_poMemoryPool));
1143 : m_apoBuildersBBOXYMin.emplace_back(
1144 187 : std::make_shared<arrow::FloatBuilder>(m_poMemoryPool));
1145 : m_apoBuildersBBOXXMax.emplace_back(
1146 187 : std::make_shared<arrow::FloatBuilder>(m_poMemoryPool));
1147 : m_apoBuildersBBOXYMax.emplace_back(
1148 187 : std::make_shared<arrow::FloatBuilder>(m_poMemoryPool));
1149 : m_apoBuildersBBOXStruct.emplace_back(
1150 374 : std::make_shared<arrow::StructBuilder>(
1151 187 : m_apoFieldsBBOX[i]->type(), m_poMemoryPool,
1152 1683 : std::vector<std::shared_ptr<arrow::ArrayBuilder>>{
1153 187 : m_apoBuildersBBOXXMin.back(),
1154 187 : m_apoBuildersBBOXYMin.back(),
1155 187 : m_apoBuildersBBOXXMax.back(),
1156 1309 : m_apoBuildersBBOXYMax.back()}));
1157 : }
1158 : }
1159 385 : }
1160 :
1161 : /************************************************************************/
1162 : /* castToFloatDown() */
1163 : /************************************************************************/
1164 :
1165 : // Cf https://github.com/sqlite/sqlite/blob/90e4a3b7fcdf63035d6f35eb44d11ff58ff4b068/ext/rtree/rtree.c#L2993C1-L2995C3
1166 : /*
1167 : ** Rounding constants for float->double conversion.
1168 : */
1169 : #define RNDTOWARDS (1.0 - 1.0 / 8388608.0) /* Round towards zero */
1170 : #define RNDAWAY (1.0 + 1.0 / 8388608.0) /* Round away from zero */
1171 :
1172 : /*
1173 : ** Convert an sqlite3_value into an RtreeValue (presumably a float)
1174 : ** while taking care to round toward negative or positive, respectively.
1175 : */
1176 3450 : static float castToFloatDown(double d)
1177 : {
1178 3450 : float f = static_cast<float>(d);
1179 3450 : if (f > d)
1180 : {
1181 12 : f = static_cast<float>(d * (d < 0 ? RNDAWAY : RNDTOWARDS));
1182 : }
1183 3450 : return f;
1184 : }
1185 :
1186 3450 : static float castToFloatUp(double d)
1187 : {
1188 3450 : float f = static_cast<float>(d);
1189 3450 : if (f < d)
1190 : {
1191 7 : f = static_cast<float>(d * (d < 0 ? RNDTOWARDS : RNDAWAY));
1192 : }
1193 3450 : return f;
1194 : }
1195 :
1196 : /************************************************************************/
1197 : /* GeoArrowLineBuilder() */
1198 : /************************************************************************/
1199 :
1200 : template <class PointBuilderType>
1201 516 : static OGRErr GeoArrowLineBuilder(const OGRLineString *poLS,
1202 : PointBuilderType *poPointBuilder,
1203 : arrow::DoubleBuilder *poXBuilder,
1204 : arrow::DoubleBuilder *poYBuilder,
1205 : arrow::DoubleBuilder *poZBuilder,
1206 : arrow::DoubleBuilder *poMBuilder)
1207 : {
1208 2360 : for (int j = 0; j < poLS->getNumPoints(); ++j)
1209 : {
1210 1844 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poPointBuilder->Append());
1211 1844 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poXBuilder->Append(poLS->getX(j)));
1212 1844 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poYBuilder->Append(poLS->getY(j)));
1213 1844 : if (poZBuilder)
1214 540 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poZBuilder->Append(poLS->getZ(j)));
1215 1844 : if (poMBuilder)
1216 220 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poMBuilder->Append(poLS->getM(j)));
1217 : }
1218 516 : return OGRERR_NONE;
1219 : }
1220 :
1221 : /************************************************************************/
1222 : /* BuildGeometry() */
1223 : /************************************************************************/
1224 :
1225 3572 : inline OGRErr OGRArrowWriterLayer::BuildGeometry(OGRGeometry *poGeom,
1226 : int iGeomField,
1227 : arrow::ArrayBuilder *poBuilder)
1228 : {
1229 3572 : const auto eGType = poGeom ? poGeom->getGeometryType() : wkbNone;
1230 : const auto eColumnGType =
1231 3572 : m_poFeatureDefn->GetGeomFieldDefn(iGeomField)->GetType();
1232 3572 : const bool bHasZ = CPL_TO_BOOL(OGR_GT_HasZ(eColumnGType));
1233 3572 : const bool bHasM = CPL_TO_BOOL(OGR_GT_HasM(eColumnGType));
1234 3572 : const bool bIsEmpty = poGeom != nullptr && poGeom->IsEmpty();
1235 3572 : OGREnvelope3D oEnvelope;
1236 3572 : if (poGeom != nullptr && !bIsEmpty)
1237 : {
1238 2073 : if (poGeom->Is3D())
1239 : {
1240 266 : poGeom->getEnvelope(&oEnvelope);
1241 266 : m_aoEnvelopes[iGeomField].Merge(oEnvelope);
1242 : }
1243 : else
1244 : {
1245 1807 : poGeom->getEnvelope(static_cast<OGREnvelope *>(&oEnvelope));
1246 1807 : m_aoEnvelopes[iGeomField].Merge(oEnvelope);
1247 : }
1248 2073 : m_oSetWrittenGeometryTypes[iGeomField].insert(eGType);
1249 : }
1250 :
1251 3572 : if (m_bWriteBBoxStruct)
1252 : {
1253 2869 : if (poGeom && !bIsEmpty)
1254 : {
1255 1682 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1256 : m_apoBuildersBBOXXMin[iGeomField]->Append(
1257 : castToFloatDown(oEnvelope.MinX)));
1258 1682 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1259 : m_apoBuildersBBOXYMin[iGeomField]->Append(
1260 : castToFloatDown(oEnvelope.MinY)));
1261 1682 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1262 : m_apoBuildersBBOXXMax[iGeomField]->Append(
1263 : castToFloatUp(oEnvelope.MaxX)));
1264 1682 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1265 : m_apoBuildersBBOXYMax[iGeomField]->Append(
1266 : castToFloatUp(oEnvelope.MaxY)));
1267 1682 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1268 : m_apoBuildersBBOXStruct[iGeomField]->Append());
1269 : }
1270 : else
1271 : {
1272 1187 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1273 : m_apoBuildersBBOXStruct[iGeomField]->AppendNull());
1274 : }
1275 : }
1276 :
1277 3572 : if (poGeom == nullptr)
1278 : {
1279 3831 : if (m_aeGeomEncoding[iGeomField] ==
1280 1285 : OGRArrowGeomEncoding::GEOARROW_FSL_POINT &&
1281 1285 : GetDriverUCName() == "PARQUET")
1282 : {
1283 : // For some reason, Parquet doesn't support a NULL FixedSizeList
1284 : // on reading
1285 4 : auto poPointBuilder =
1286 : static_cast<arrow::FixedSizeListBuilder *>(poBuilder);
1287 4 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poPointBuilder->Append());
1288 : auto poValueBuilder = static_cast<arrow::DoubleBuilder *>(
1289 4 : poPointBuilder->value_builder());
1290 4 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poValueBuilder->Append(
1291 : std::numeric_limits<double>::quiet_NaN()));
1292 4 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poValueBuilder->Append(
1293 : std::numeric_limits<double>::quiet_NaN()));
1294 4 : if (bHasZ)
1295 2 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poValueBuilder->Append(
1296 : std::numeric_limits<double>::quiet_NaN()));
1297 4 : if (bHasM)
1298 0 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poValueBuilder->Append(
1299 : std::numeric_limits<double>::quiet_NaN()));
1300 : }
1301 : else
1302 : {
1303 1273 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poBuilder->AppendNull());
1304 : }
1305 :
1306 1277 : return OGRERR_NONE;
1307 : }
1308 :
1309 : // The following checks are only valid for GeoArrow encoding
1310 3277 : if (m_aeGeomEncoding[iGeomField] != OGRArrowGeomEncoding::WKB &&
1311 982 : m_aeGeomEncoding[iGeomField] != OGRArrowGeomEncoding::WKT)
1312 : {
1313 862 : if ((!bIsEmpty && eGType != eColumnGType) ||
1314 188 : (bIsEmpty && wkbFlatten(eGType) != wkbFlatten(eColumnGType)))
1315 : {
1316 6 : CPLError(CE_Warning, CPLE_AppDefined,
1317 : "Geometry of type %s found, whereas %s is expected. "
1318 : "Writing null geometry",
1319 : OGRGeometryTypeToName(eGType),
1320 : OGRGeometryTypeToName(eColumnGType));
1321 6 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poBuilder->AppendNull());
1322 :
1323 6 : return OGRERR_NONE;
1324 : }
1325 : }
1326 :
1327 2289 : switch (m_aeGeomEncoding[iGeomField])
1328 : {
1329 1313 : case OGRArrowGeomEncoding::WKB:
1330 : {
1331 0 : std::unique_ptr<OGRGeometry> poGeomModified;
1332 1313 : if (OGR_GT_HasM(eGType) && !OGR_GT_HasM(eColumnGType))
1333 : {
1334 : static bool bHasWarned = false;
1335 0 : if (!bHasWarned)
1336 : {
1337 0 : CPLError(CE_Warning, CPLE_AppDefined,
1338 : "Removing M component from geometry");
1339 0 : bHasWarned = true;
1340 : }
1341 0 : poGeomModified.reset(poGeom->clone());
1342 0 : poGeomModified->setMeasured(false);
1343 0 : poGeom = poGeomModified.get();
1344 : }
1345 1313 : FixupGeometryBeforeWriting(poGeom);
1346 1313 : const auto nSize = poGeom->WkbSize();
1347 1313 : if (nSize < INT_MAX)
1348 : {
1349 1313 : m_abyBuffer.resize(nSize);
1350 1313 : poGeom->exportToWkb(wkbNDR, &m_abyBuffer[0], wkbVariantIso);
1351 1313 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1352 : static_cast<arrow::BinaryBuilder *>(poBuilder)->Append(
1353 : m_abyBuffer.data(),
1354 : static_cast<int>(m_abyBuffer.size())));
1355 : }
1356 : else
1357 : {
1358 0 : CPLError(CE_Warning, CPLE_AppDefined,
1359 : "Too big geometry. "
1360 : "Writing null geometry");
1361 0 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poBuilder->AppendNull());
1362 : }
1363 1313 : break;
1364 : }
1365 :
1366 308 : case OGRArrowGeomEncoding::WKT:
1367 : {
1368 308 : OGRWktOptions options;
1369 308 : options.variant = wkbVariantIso;
1370 308 : if (m_nWKTCoordinatePrecision >= 0)
1371 : {
1372 0 : options.format = OGRWktFormat::F;
1373 0 : options.xyPrecision = m_nWKTCoordinatePrecision;
1374 0 : options.zPrecision = m_nWKTCoordinatePrecision;
1375 0 : options.mPrecision = m_nWKTCoordinatePrecision;
1376 : }
1377 308 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1378 : static_cast<arrow::StringBuilder *>(poBuilder)->Append(
1379 : poGeom->exportToWkt(options)));
1380 308 : break;
1381 : }
1382 :
1383 20 : case OGRArrowGeomEncoding::GEOARROW_FSL_POINT:
1384 : {
1385 20 : const auto poPoint = poGeom->toPoint();
1386 20 : auto poPointBuilder =
1387 : static_cast<arrow::FixedSizeListBuilder *>(poBuilder);
1388 20 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poPointBuilder->Append());
1389 : auto poValueBuilder = static_cast<arrow::DoubleBuilder *>(
1390 20 : poPointBuilder->value_builder());
1391 20 : if (bIsEmpty)
1392 : {
1393 8 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poValueBuilder->Append(
1394 : std::numeric_limits<double>::quiet_NaN()));
1395 8 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poValueBuilder->Append(
1396 : std::numeric_limits<double>::quiet_NaN()));
1397 8 : if (bHasZ)
1398 4 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poValueBuilder->Append(
1399 : std::numeric_limits<double>::quiet_NaN()));
1400 8 : if (bHasM)
1401 2 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poValueBuilder->Append(
1402 : std::numeric_limits<double>::quiet_NaN()));
1403 : }
1404 : else
1405 : {
1406 12 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1407 : poValueBuilder->Append(poPoint->getX()));
1408 12 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1409 : poValueBuilder->Append(poPoint->getY()));
1410 12 : if (bHasZ)
1411 6 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1412 : poValueBuilder->Append(poPoint->getZ()));
1413 12 : if (bHasM)
1414 2 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1415 : poValueBuilder->Append(poPoint->getM()));
1416 : }
1417 20 : break;
1418 : }
1419 :
1420 : #define GET_XYZM_STRUCT_FIELD_BUILDERS_FROM(poPointBuilder) \
1421 : auto poXBuilder = \
1422 : static_cast<arrow::DoubleBuilder *>(poPointBuilder->field_builder(0)); \
1423 : auto poYBuilder = \
1424 : static_cast<arrow::DoubleBuilder *>(poPointBuilder->field_builder(1)); \
1425 : int iSubField = 2; \
1426 : arrow::DoubleBuilder *poZBuilder = nullptr; \
1427 : if (bHasZ) \
1428 : { \
1429 : poZBuilder = static_cast<arrow::DoubleBuilder *>( \
1430 : poPointBuilder->field_builder(iSubField)); \
1431 : ++iSubField; \
1432 : } \
1433 : arrow::DoubleBuilder *poMBuilder = nullptr; \
1434 : if (bHasM) \
1435 : { \
1436 : poMBuilder = static_cast<arrow::DoubleBuilder *>( \
1437 : poPointBuilder->field_builder(iSubField)); \
1438 : } \
1439 : do \
1440 : { \
1441 : } while (0)
1442 :
1443 85 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_POINT:
1444 : {
1445 85 : const auto poPoint = poGeom->toPoint();
1446 85 : auto poPointBuilder =
1447 : static_cast<arrow::StructBuilder *>(poBuilder);
1448 85 : GET_XYZM_STRUCT_FIELD_BUILDERS_FROM(poPointBuilder);
1449 85 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poPointBuilder->Append());
1450 :
1451 85 : if (bIsEmpty)
1452 : {
1453 20 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poXBuilder->Append(
1454 : std::numeric_limits<double>::quiet_NaN()));
1455 20 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poYBuilder->Append(
1456 : std::numeric_limits<double>::quiet_NaN()));
1457 : }
1458 : else
1459 : {
1460 65 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1461 : poXBuilder->Append(poPoint->getX()));
1462 65 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1463 : poYBuilder->Append(poPoint->getY()));
1464 : }
1465 85 : if (poZBuilder)
1466 : {
1467 28 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poZBuilder->Append(
1468 : bIsEmpty ? std::numeric_limits<double>::quiet_NaN()
1469 : : poPoint->getZ()));
1470 : }
1471 85 : if (poMBuilder)
1472 : {
1473 4 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poMBuilder->Append(
1474 : bIsEmpty ? std::numeric_limits<double>::quiet_NaN()
1475 : : poPoint->getM()));
1476 : }
1477 85 : break;
1478 : }
1479 :
1480 20 : case OGRArrowGeomEncoding::GEOARROW_FSL_LINESTRING:
1481 : {
1482 20 : const auto poLS = poGeom->toLineString();
1483 20 : auto poListBuilder = static_cast<arrow::ListBuilder *>(poBuilder);
1484 : auto poPointBuilder = static_cast<arrow::FixedSizeListBuilder *>(
1485 20 : poListBuilder->value_builder());
1486 : auto poValueBuilder = static_cast<arrow::DoubleBuilder *>(
1487 20 : poPointBuilder->value_builder());
1488 :
1489 20 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poListBuilder->Append());
1490 20 : OGR_ARROW_PROPAGATE_OGRERR(GeoArrowLineBuilder(
1491 : poLS, poPointBuilder, poValueBuilder, poValueBuilder,
1492 : bHasZ ? poValueBuilder : nullptr,
1493 : bHasM ? poValueBuilder : nullptr));
1494 20 : break;
1495 : }
1496 :
1497 57 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_LINESTRING:
1498 : {
1499 57 : const auto poLS = poGeom->toLineString();
1500 57 : auto poListBuilder = static_cast<arrow::ListBuilder *>(poBuilder);
1501 : auto poPointBuilder = static_cast<arrow::StructBuilder *>(
1502 57 : poListBuilder->value_builder());
1503 57 : GET_XYZM_STRUCT_FIELD_BUILDERS_FROM(poPointBuilder);
1504 :
1505 57 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poListBuilder->Append());
1506 57 : OGR_ARROW_PROPAGATE_OGRERR(
1507 : GeoArrowLineBuilder(poLS, poPointBuilder, poXBuilder,
1508 : poYBuilder, poZBuilder, poMBuilder));
1509 57 : break;
1510 : }
1511 :
1512 32 : case OGRArrowGeomEncoding::GEOARROW_FSL_POLYGON:
1513 : {
1514 32 : const auto poPolygon = poGeom->toPolygon();
1515 32 : auto poPolygonBuilder =
1516 : static_cast<arrow::ListBuilder *>(poBuilder);
1517 : auto poRingBuilder = static_cast<arrow::ListBuilder *>(
1518 32 : poPolygonBuilder->value_builder());
1519 : auto poPointBuilder = static_cast<arrow::FixedSizeListBuilder *>(
1520 32 : poRingBuilder->value_builder());
1521 : auto poValueBuilder = static_cast<arrow::DoubleBuilder *>(
1522 32 : poPointBuilder->value_builder());
1523 32 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poPolygonBuilder->Append());
1524 62 : for (const auto *poRing : *poPolygon)
1525 : {
1526 30 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poRingBuilder->Append());
1527 30 : OGR_ARROW_PROPAGATE_OGRERR(GeoArrowLineBuilder(
1528 : poRing, poPointBuilder, poValueBuilder, poValueBuilder,
1529 : bHasZ ? poValueBuilder : nullptr,
1530 : bHasM ? poValueBuilder : nullptr));
1531 : }
1532 32 : break;
1533 : }
1534 :
1535 93 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_POLYGON:
1536 : {
1537 93 : const auto poPolygon = poGeom->toPolygon();
1538 93 : auto poPolygonBuilder =
1539 : static_cast<arrow::ListBuilder *>(poBuilder);
1540 : auto poRingBuilder = static_cast<arrow::ListBuilder *>(
1541 93 : poPolygonBuilder->value_builder());
1542 : auto poPointBuilder = static_cast<arrow::StructBuilder *>(
1543 93 : poRingBuilder->value_builder());
1544 93 : GET_XYZM_STRUCT_FIELD_BUILDERS_FROM(poPointBuilder);
1545 :
1546 93 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poPolygonBuilder->Append());
1547 178 : for (const auto *poRing : *poPolygon)
1548 : {
1549 85 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poRingBuilder->Append());
1550 85 : OGR_ARROW_PROPAGATE_OGRERR(
1551 : GeoArrowLineBuilder(poRing, poPointBuilder, poXBuilder,
1552 : poYBuilder, poZBuilder, poMBuilder));
1553 : }
1554 93 : break;
1555 : }
1556 :
1557 32 : case OGRArrowGeomEncoding::GEOARROW_FSL_MULTIPOINT:
1558 : {
1559 32 : const auto poMultiPoint = poGeom->toMultiPoint();
1560 32 : auto poListBuilder = static_cast<arrow::ListBuilder *>(poBuilder);
1561 : auto poPointBuilder = static_cast<arrow::FixedSizeListBuilder *>(
1562 32 : poListBuilder->value_builder());
1563 : auto poValueBuilder = static_cast<arrow::DoubleBuilder *>(
1564 32 : poPointBuilder->value_builder());
1565 32 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poListBuilder->Append());
1566 88 : for (const auto *poPoint : *poMultiPoint)
1567 : {
1568 56 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poPointBuilder->Append());
1569 56 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1570 : poValueBuilder->Append(poPoint->getX()));
1571 56 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1572 : poValueBuilder->Append(poPoint->getY()));
1573 56 : if (bHasZ)
1574 28 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1575 : poValueBuilder->Append(poPoint->getZ()));
1576 56 : if (bHasM)
1577 18 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1578 : poValueBuilder->Append(poPoint->getM()));
1579 : }
1580 32 : break;
1581 : }
1582 :
1583 81 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_MULTIPOINT:
1584 : {
1585 81 : const auto poMultiPoint = poGeom->toMultiPoint();
1586 81 : auto poListBuilder = static_cast<arrow::ListBuilder *>(poBuilder);
1587 : auto poPointBuilder = static_cast<arrow::StructBuilder *>(
1588 81 : poListBuilder->value_builder());
1589 81 : GET_XYZM_STRUCT_FIELD_BUILDERS_FROM(poPointBuilder);
1590 :
1591 81 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poListBuilder->Append());
1592 198 : for (const auto *poPoint : *poMultiPoint)
1593 : {
1594 117 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poPointBuilder->Append());
1595 117 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1596 : poXBuilder->Append(poPoint->getX()));
1597 117 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1598 : poYBuilder->Append(poPoint->getY()));
1599 117 : if (poZBuilder)
1600 58 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1601 : poZBuilder->Append(poPoint->getZ()));
1602 117 : if (poMBuilder)
1603 18 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1604 : poMBuilder->Append(poPoint->getM()));
1605 : }
1606 81 : break;
1607 : }
1608 :
1609 28 : case OGRArrowGeomEncoding::GEOARROW_FSL_MULTILINESTRING:
1610 : {
1611 28 : const auto poMLS = poGeom->toMultiLineString();
1612 28 : auto poMLSBuilder = static_cast<arrow::ListBuilder *>(poBuilder);
1613 : auto poLSBuilder = static_cast<arrow::ListBuilder *>(
1614 28 : poMLSBuilder->value_builder());
1615 : auto poPointBuilder = static_cast<arrow::FixedSizeListBuilder *>(
1616 28 : poLSBuilder->value_builder());
1617 : auto poValueBuilder = static_cast<arrow::DoubleBuilder *>(
1618 28 : poPointBuilder->value_builder());
1619 28 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poMLSBuilder->Append());
1620 60 : for (const auto *poLS : *poMLS)
1621 : {
1622 32 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poLSBuilder->Append());
1623 32 : OGR_ARROW_PROPAGATE_OGRERR(GeoArrowLineBuilder(
1624 : poLS, poPointBuilder, poValueBuilder, poValueBuilder,
1625 : bHasZ ? poValueBuilder : nullptr,
1626 : bHasM ? poValueBuilder : nullptr));
1627 : }
1628 28 : break;
1629 : }
1630 :
1631 77 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_MULTILINESTRING:
1632 : {
1633 77 : const auto poMLS = poGeom->toMultiLineString();
1634 77 : auto poMLSBuilder = static_cast<arrow::ListBuilder *>(poBuilder);
1635 : auto poLSBuilder = static_cast<arrow::ListBuilder *>(
1636 77 : poMLSBuilder->value_builder());
1637 : auto poPointBuilder = static_cast<arrow::StructBuilder *>(
1638 77 : poLSBuilder->value_builder());
1639 77 : GET_XYZM_STRUCT_FIELD_BUILDERS_FROM(poPointBuilder);
1640 :
1641 77 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poMLSBuilder->Append());
1642 170 : for (const auto *poLS : *poMLS)
1643 : {
1644 93 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poLSBuilder->Append());
1645 93 : OGR_ARROW_PROPAGATE_OGRERR(
1646 : GeoArrowLineBuilder(poLS, poPointBuilder, poXBuilder,
1647 : poYBuilder, poZBuilder, poMBuilder));
1648 : }
1649 77 : break;
1650 : }
1651 :
1652 38 : case OGRArrowGeomEncoding::GEOARROW_FSL_MULTIPOLYGON:
1653 : {
1654 38 : const auto poMPoly = poGeom->toMultiPolygon();
1655 38 : auto poMPolyBuilder = static_cast<arrow::ListBuilder *>(poBuilder);
1656 : auto poPolyBuilder = static_cast<arrow::ListBuilder *>(
1657 38 : poMPolyBuilder->value_builder());
1658 : auto poRingBuilder = static_cast<arrow::ListBuilder *>(
1659 38 : poPolyBuilder->value_builder());
1660 : auto poPointBuilder = static_cast<arrow::FixedSizeListBuilder *>(
1661 38 : poRingBuilder->value_builder());
1662 : auto poValueBuilder = static_cast<arrow::DoubleBuilder *>(
1663 38 : poPointBuilder->value_builder());
1664 38 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poMPolyBuilder->Append());
1665 82 : for (const auto *poPolygon : *poMPoly)
1666 : {
1667 44 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poPolyBuilder->Append());
1668 98 : for (const auto *poRing : *poPolygon)
1669 : {
1670 54 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poRingBuilder->Append());
1671 54 : OGR_ARROW_PROPAGATE_OGRERR(GeoArrowLineBuilder(
1672 : poRing, poPointBuilder, poValueBuilder, poValueBuilder,
1673 : bHasZ ? poValueBuilder : nullptr,
1674 : bHasM ? poValueBuilder : nullptr));
1675 : }
1676 : }
1677 38 : break;
1678 : }
1679 :
1680 105 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_MULTIPOLYGON:
1681 : {
1682 105 : const auto poMPoly = poGeom->toMultiPolygon();
1683 105 : auto poMPolyBuilder = static_cast<arrow::ListBuilder *>(poBuilder);
1684 : auto poPolyBuilder = static_cast<arrow::ListBuilder *>(
1685 105 : poMPolyBuilder->value_builder());
1686 : auto poRingBuilder = static_cast<arrow::ListBuilder *>(
1687 105 : poPolyBuilder->value_builder());
1688 : auto poPointBuilder = static_cast<arrow::StructBuilder *>(
1689 105 : poRingBuilder->value_builder());
1690 105 : GET_XYZM_STRUCT_FIELD_BUILDERS_FROM(poPointBuilder);
1691 :
1692 105 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poMPolyBuilder->Append());
1693 222 : for (const auto *poPolygon : *poMPoly)
1694 : {
1695 117 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poPolyBuilder->Append());
1696 262 : for (const auto *poRing : *poPolygon)
1697 : {
1698 145 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poRingBuilder->Append());
1699 145 : OGR_ARROW_PROPAGATE_OGRERR(GeoArrowLineBuilder(
1700 : poRing, poPointBuilder, poXBuilder, poYBuilder,
1701 : poZBuilder, poMBuilder));
1702 : }
1703 : }
1704 105 : break;
1705 : }
1706 :
1707 0 : case OGRArrowGeomEncoding::GEOARROW_FSL_GENERIC:
1708 : case OGRArrowGeomEncoding::GEOARROW_STRUCT_GENERIC:
1709 : {
1710 0 : CPLAssert(false);
1711 : break;
1712 : }
1713 : }
1714 :
1715 2289 : return OGRERR_NONE;
1716 : }
1717 :
1718 : /************************************************************************/
1719 : /* ICreateFeature() */
1720 : /************************************************************************/
1721 :
1722 3113 : inline OGRErr OGRArrowWriterLayer::ICreateFeature(OGRFeature *poFeature)
1723 : {
1724 3113 : if (m_poSchema == nullptr)
1725 : {
1726 227 : CreateSchema();
1727 : }
1728 :
1729 3113 : if (m_apoBuilders.empty())
1730 : {
1731 264 : if (!m_apoFieldsFromArrowSchema.empty())
1732 : {
1733 0 : CPLError(CE_Failure, CPLE_NotSupported,
1734 : "ICreateFeature() cannot be used after "
1735 : "CreateFieldFromArrowSchema()");
1736 0 : return OGRERR_FAILURE;
1737 : }
1738 264 : CreateArrayBuilders();
1739 : }
1740 :
1741 : // First pass to check not-null constraints as Arrow doesn't seem
1742 : // to do that on the writing side. But such files can't be read.
1743 3113 : const int nFieldCount = m_poFeatureDefn->GetFieldCount();
1744 8268 : for (int i = 0; i < nFieldCount; ++i)
1745 : {
1746 5156 : const auto poFieldDefn = m_poFeatureDefn->GetFieldDefn(i);
1747 5158 : if (!poFieldDefn->IsNullable() &&
1748 2 : !poFeature->IsFieldSetAndNotNullUnsafe(i))
1749 : {
1750 1 : CPLError(CE_Failure, CPLE_AppDefined,
1751 : "Null value found in non-nullable field %s",
1752 : poFieldDefn->GetNameRef());
1753 1 : return OGRERR_FAILURE;
1754 : }
1755 : }
1756 :
1757 3112 : const int nGeomFieldCount = m_poFeatureDefn->GetGeomFieldCount();
1758 6335 : for (int i = 0; i < nGeomFieldCount; ++i)
1759 : {
1760 3223 : const auto poGeomFieldDefn = m_poFeatureDefn->GetGeomFieldDefn(i);
1761 3255 : if (!poGeomFieldDefn->IsNullable() &&
1762 32 : poFeature->GetGeomFieldRef(i) == nullptr)
1763 : {
1764 0 : CPLError(CE_Failure, CPLE_AppDefined,
1765 : "Null value found in non-nullable geometry field %s",
1766 : poGeomFieldDefn->GetNameRef());
1767 0 : return OGRERR_FAILURE;
1768 : }
1769 : }
1770 :
1771 : // Write FID, if FID column present
1772 3112 : int nArrowIdx = 0;
1773 3112 : if (!m_osFIDColumn.empty())
1774 : {
1775 2258 : int64_t nFID = poFeature->GetFID();
1776 2258 : if (nFID == OGRNullFID)
1777 : {
1778 36 : nFID = m_nFeatureCount;
1779 36 : poFeature->SetFID(nFID);
1780 : }
1781 : auto poBuilder =
1782 2258 : static_cast<arrow::Int64Builder *>(m_apoBuilders[0].get());
1783 2258 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poBuilder->Append(nFID));
1784 2258 : nArrowIdx++;
1785 : }
1786 :
1787 : // Write attributes
1788 8267 : for (int i = 0; i < nFieldCount; ++i, ++nArrowIdx)
1789 : {
1790 5155 : auto poBuilder = m_apoBuilders[nArrowIdx].get();
1791 5155 : if (!poFeature->IsFieldSetAndNotNullUnsafe(i))
1792 : {
1793 966 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poBuilder->AppendNull());
1794 966 : continue;
1795 : }
1796 :
1797 4189 : const auto poFieldDefn = m_poFeatureDefn->GetFieldDefn(i);
1798 4189 : const auto eSubDT = poFieldDefn->GetSubType();
1799 4189 : switch (poFieldDefn->GetType())
1800 : {
1801 2453 : case OFTInteger:
1802 2453 : if (eSubDT == OFSTBoolean)
1803 16 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1804 : static_cast<arrow::BooleanBuilder *>(poBuilder)->Append(
1805 : poFeature->GetFieldAsIntegerUnsafe(i) != 0));
1806 2437 : else if (eSubDT == OFSTInt16)
1807 16 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1808 : static_cast<arrow::Int16Builder *>(poBuilder)->Append(
1809 : static_cast<int16_t>(
1810 : poFeature->GetFieldAsIntegerUnsafe(i))));
1811 : else
1812 2421 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1813 : static_cast<arrow::Int32Builder *>(poBuilder)->Append(
1814 : poFeature->GetFieldAsIntegerUnsafe(i)));
1815 2453 : break;
1816 :
1817 128 : case OFTInteger64:
1818 128 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1819 : static_cast<arrow::Int64Builder *>(poBuilder)->Append(
1820 : static_cast<int64_t>(
1821 : poFeature->GetFieldAsInteger64Unsafe(i))));
1822 128 : break;
1823 :
1824 201 : case OFTReal:
1825 : {
1826 201 : const auto arrowType = m_poSchema->fields()[nArrowIdx]->type();
1827 201 : const double dfVal = poFeature->GetFieldAsDoubleUnsafe(i);
1828 201 : if (arrowType->id() == arrow::Type::DECIMAL128)
1829 : {
1830 : auto res = arrow::Decimal128::FromReal(
1831 : dfVal, poFieldDefn->GetWidth(),
1832 32 : poFieldDefn->GetPrecision());
1833 32 : if (res.ok())
1834 : {
1835 32 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1836 : static_cast<arrow::Decimal128Builder *>(poBuilder)
1837 : ->Append(*res));
1838 : }
1839 : else
1840 : {
1841 0 : CPLError(CE_Warning, CPLE_AppDefined,
1842 : "Cannot parse %.18g as a %d.%d decimal", dfVal,
1843 : poFieldDefn->GetWidth(),
1844 : poFieldDefn->GetPrecision());
1845 0 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poBuilder->AppendNull());
1846 : }
1847 : }
1848 169 : else if (arrowType->id() == arrow::Type::DECIMAL256)
1849 : {
1850 : auto res = arrow::Decimal256::FromReal(
1851 : dfVal, poFieldDefn->GetWidth(),
1852 0 : poFieldDefn->GetPrecision());
1853 0 : if (res.ok())
1854 : {
1855 0 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1856 : static_cast<arrow::Decimal256Builder *>(poBuilder)
1857 : ->Append(*res));
1858 : }
1859 : else
1860 : {
1861 0 : CPLError(CE_Warning, CPLE_AppDefined,
1862 : "Cannot parse %.18g as a %d.%d decimal", dfVal,
1863 : poFieldDefn->GetWidth(),
1864 : poFieldDefn->GetPrecision());
1865 0 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poBuilder->AppendNull());
1866 : }
1867 : }
1868 169 : else if (eSubDT == OFSTFloat32)
1869 : {
1870 28 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1871 : static_cast<arrow::FloatBuilder *>(poBuilder)->Append(
1872 : static_cast<float>(dfVal)));
1873 : }
1874 : else
1875 : {
1876 141 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1877 : static_cast<arrow::DoubleBuilder *>(poBuilder)->Append(
1878 : dfVal));
1879 : }
1880 201 : break;
1881 : }
1882 :
1883 499 : case OFTString:
1884 : case OFTWideString:
1885 499 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1886 : static_cast<arrow::StringBuilder *>(poBuilder)->Append(
1887 : poFeature->GetFieldAsStringUnsafe(i)));
1888 499 : break;
1889 :
1890 62 : case OFTBinary:
1891 : {
1892 62 : int nSize = 0;
1893 62 : const auto pData = poFeature->GetFieldAsBinary(i, &nSize);
1894 62 : if (poFieldDefn->GetWidth() != 0)
1895 : {
1896 20 : if (poFieldDefn->GetWidth() != nSize)
1897 : {
1898 0 : CPLError(
1899 : CE_Warning, CPLE_AppDefined,
1900 : "Cannot write field %s. Got %d bytes, expected %d",
1901 : poFieldDefn->GetNameRef(), nSize,
1902 : poFieldDefn->GetWidth());
1903 0 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poBuilder->AppendNull());
1904 : }
1905 : else
1906 : {
1907 20 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1908 : static_cast<arrow::FixedSizeBinaryBuilder *>(
1909 : poBuilder)
1910 : ->Append(pData));
1911 : }
1912 : }
1913 : else
1914 42 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1915 : static_cast<arrow::BinaryBuilder *>(poBuilder)->Append(
1916 : pData, nSize));
1917 62 : break;
1918 : }
1919 :
1920 216 : case OFTIntegerList:
1921 : {
1922 216 : auto poListBuilder =
1923 : static_cast<arrow::ListBuilder *>(poBuilder);
1924 216 : if (eSubDT == OFSTBoolean)
1925 : {
1926 36 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poListBuilder->Append());
1927 : auto poValueBuilder = static_cast<arrow::BooleanBuilder *>(
1928 36 : poListBuilder->value_builder());
1929 36 : int nValues = 0;
1930 : const auto panValues =
1931 36 : poFeature->GetFieldAsIntegerList(i, &nValues);
1932 108 : for (int j = 0; j < nValues; ++j)
1933 72 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1934 : poValueBuilder->Append(panValues[j] != 0));
1935 : }
1936 180 : else if (eSubDT == OFSTInt16)
1937 : {
1938 0 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poListBuilder->Append());
1939 : auto poValueBuilder = static_cast<arrow::Int16Builder *>(
1940 0 : poListBuilder->value_builder());
1941 0 : int nValues = 0;
1942 : const auto panValues =
1943 0 : poFeature->GetFieldAsIntegerList(i, &nValues);
1944 0 : for (int j = 0; j < nValues; ++j)
1945 0 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poValueBuilder->Append(
1946 : static_cast<int16_t>(panValues[j])));
1947 : }
1948 : else
1949 : {
1950 180 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poListBuilder->Append());
1951 : auto poValueBuilder = static_cast<arrow::Int32Builder *>(
1952 180 : poListBuilder->value_builder());
1953 180 : int nValues = 0;
1954 : const auto panValues =
1955 180 : poFeature->GetFieldAsIntegerList(i, &nValues);
1956 540 : for (int j = 0; j < nValues; ++j)
1957 360 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
1958 : poValueBuilder->Append(panValues[j]));
1959 : }
1960 216 : break;
1961 : }
1962 :
1963 92 : case OFTInteger64List:
1964 : {
1965 92 : auto poListBuilder =
1966 : static_cast<arrow::ListBuilder *>(poBuilder);
1967 92 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poListBuilder->Append());
1968 : auto poValueBuilder = static_cast<arrow::Int64Builder *>(
1969 92 : poListBuilder->value_builder());
1970 92 : int nValues = 0;
1971 : const auto panValues =
1972 92 : poFeature->GetFieldAsInteger64List(i, &nValues);
1973 292 : for (int j = 0; j < nValues; ++j)
1974 200 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poValueBuilder->Append(
1975 : static_cast<int64_t>(panValues[j])));
1976 92 : break;
1977 : }
1978 :
1979 152 : case OFTRealList:
1980 : {
1981 152 : auto poListBuilder =
1982 : static_cast<arrow::ListBuilder *>(poBuilder);
1983 152 : if (eSubDT == OFSTFloat32)
1984 : {
1985 48 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poListBuilder->Append());
1986 : auto poValueBuilder = static_cast<arrow::FloatBuilder *>(
1987 48 : poListBuilder->value_builder());
1988 48 : int nValues = 0;
1989 : const auto padfValues =
1990 48 : poFeature->GetFieldAsDoubleList(i, &nValues);
1991 144 : for (int j = 0; j < nValues; ++j)
1992 96 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poValueBuilder->Append(
1993 : static_cast<float>(padfValues[j])));
1994 : }
1995 : else
1996 : {
1997 104 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poListBuilder->Append());
1998 : auto poValueBuilder = static_cast<arrow::DoubleBuilder *>(
1999 104 : poListBuilder->value_builder());
2000 104 : int nValues = 0;
2001 : const auto padfValues =
2002 104 : poFeature->GetFieldAsDoubleList(i, &nValues);
2003 280 : for (int j = 0; j < nValues; ++j)
2004 176 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
2005 : poValueBuilder->Append(padfValues[j]));
2006 : }
2007 152 : break;
2008 : }
2009 :
2010 52 : case OFTStringList:
2011 : case OFTWideStringList:
2012 : {
2013 52 : auto poListBuilder =
2014 : static_cast<arrow::ListBuilder *>(poBuilder);
2015 52 : OGR_ARROW_RETURN_OGRERR_NOT_OK(poListBuilder->Append());
2016 : auto poValueBuilder = static_cast<arrow::StringBuilder *>(
2017 52 : poListBuilder->value_builder());
2018 52 : const auto papszValues = poFeature->GetFieldAsStringList(i);
2019 132 : for (int j = 0; papszValues && papszValues[j]; ++j)
2020 80 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
2021 : poValueBuilder->Append(papszValues[j]));
2022 52 : break;
2023 : }
2024 :
2025 109 : case OFTDate:
2026 : {
2027 : int nYear, nMonth, nDay, nHour, nMinute;
2028 : float fSec;
2029 : int nTZFlag;
2030 109 : poFeature->GetFieldAsDateTime(i, &nYear, &nMonth, &nDay, &nHour,
2031 : &nMinute, &fSec, &nTZFlag);
2032 : struct tm brokenDown;
2033 109 : memset(&brokenDown, 0, sizeof(brokenDown));
2034 109 : brokenDown.tm_year = nYear - 1900;
2035 109 : brokenDown.tm_mon = nMonth - 1;
2036 109 : brokenDown.tm_mday = nDay;
2037 109 : GIntBig nVal = CPLYMDHMSToUnixTime(&brokenDown);
2038 109 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
2039 : static_cast<arrow::Date32Builder *>(poBuilder)->Append(
2040 : static_cast<int>(nVal / 86400)));
2041 109 : break;
2042 : }
2043 :
2044 36 : case OFTTime:
2045 : {
2046 : int nYear, nMonth, nDay, nHour, nMinute;
2047 : float fSec;
2048 : int nTZFlag;
2049 36 : poFeature->GetFieldAsDateTime(i, &nYear, &nMonth, &nDay, &nHour,
2050 : &nMinute, &fSec, &nTZFlag);
2051 36 : int nVal = nHour * 3600 + nMinute * 60;
2052 36 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
2053 : static_cast<arrow::Time32Builder *>(poBuilder)->Append(
2054 : static_cast<int>(
2055 : (static_cast<double>(nVal) + fSec) * 1000 + 0.5)));
2056 36 : break;
2057 : }
2058 :
2059 189 : case OFTDateTime:
2060 : {
2061 : int nYear, nMonth, nDay, nHour, nMinute;
2062 : float fSec;
2063 : int nTZFlag;
2064 189 : poFeature->GetFieldAsDateTime(i, &nYear, &nMonth, &nDay, &nHour,
2065 : &nMinute, &fSec, &nTZFlag);
2066 : struct tm brokenDown;
2067 189 : memset(&brokenDown, 0, sizeof(brokenDown));
2068 189 : brokenDown.tm_year = nYear - 1900;
2069 189 : brokenDown.tm_mon = nMonth - 1;
2070 189 : brokenDown.tm_mday = nDay;
2071 189 : brokenDown.tm_hour = nHour;
2072 189 : brokenDown.tm_min = nMinute;
2073 189 : brokenDown.tm_sec = 0;
2074 189 : GIntBig nVal = CPLYMDHMSToUnixTime(&brokenDown);
2075 306 : if (!IsFileWriterCreated() &&
2076 117 : m_anTZFlag[i] != OGR_TZFLAG_UNKNOWN)
2077 : {
2078 59 : if (m_anTZFlag[i] == TZFLAG_UNINITIALIZED)
2079 35 : m_anTZFlag[i] = nTZFlag;
2080 24 : else if (m_anTZFlag[i] != nTZFlag)
2081 : {
2082 0 : if (m_anTZFlag[i] >= OGR_TZFLAG_MIXED_TZ &&
2083 0 : nTZFlag >= OGR_TZFLAG_MIXED_TZ)
2084 : {
2085 0 : m_anTZFlag[i] =
2086 : OGR_TZFLAG_MIXED_TZ; // harmonize on UTC ultimately
2087 : }
2088 : else
2089 : {
2090 0 : CPLError(CE_Warning, CPLE_AppDefined,
2091 : "Field %s contains a mix of "
2092 : "timezone-aware and local/without "
2093 : "timezone values.",
2094 : poFieldDefn->GetNameRef());
2095 0 : m_anTZFlag[i] = OGR_TZFLAG_UNKNOWN;
2096 : }
2097 : }
2098 : }
2099 189 : if (nTZFlag > OGR_TZFLAG_MIXED_TZ)
2100 : {
2101 60 : const int nOffsetSec = (nTZFlag - OGR_TZFLAG_UTC) * 15 * 60;
2102 60 : nVal -= nOffsetSec;
2103 : }
2104 189 : OGR_ARROW_RETURN_OGRERR_NOT_OK(
2105 : static_cast<arrow::TimestampBuilder *>(poBuilder)->Append(
2106 : static_cast<int64_t>(
2107 : (static_cast<double>(nVal) + fSec) * 1000 + 0.5)));
2108 189 : break;
2109 : }
2110 : }
2111 : }
2112 :
2113 : // Write geometries
2114 6335 : for (int i = 0; i < nGeomFieldCount; ++i, ++nArrowIdx)
2115 : {
2116 3223 : auto poBuilder = m_apoBuilders[nArrowIdx].get();
2117 3223 : OGRGeometry *poGeom = poFeature->GetGeomFieldRef(i);
2118 3223 : if (BuildGeometry(poGeom, i, poBuilder) != OGRERR_NONE)
2119 0 : return OGRERR_FAILURE;
2120 : }
2121 :
2122 3112 : m_nFeatureCount++;
2123 :
2124 : // Flush the current row group if reaching the limit of rows per group.
2125 3112 : if (!m_apoBuilders.empty() && m_apoBuilders[0]->length() == m_nRowGroupSize)
2126 : {
2127 22 : if (!FlushFeatures())
2128 0 : return OGRERR_FAILURE;
2129 : }
2130 :
2131 3112 : return OGRERR_NONE;
2132 : }
2133 :
2134 : /************************************************************************/
2135 : /* FlushFeatures() */
2136 : /************************************************************************/
2137 :
2138 38 : inline bool OGRArrowWriterLayer::FlushFeatures()
2139 : {
2140 38 : if (m_apoBuilders.empty() || m_apoBuilders[0]->length() == 0)
2141 0 : return true;
2142 :
2143 38 : if (!IsFileWriterCreated())
2144 : {
2145 8 : CreateWriter();
2146 8 : if (!IsFileWriterCreated())
2147 0 : return false;
2148 : }
2149 :
2150 38 : return FlushGroup();
2151 : }
2152 :
2153 : /************************************************************************/
2154 : /* GetFeatureCount() */
2155 : /************************************************************************/
2156 :
2157 1 : inline GIntBig OGRArrowWriterLayer::GetFeatureCount(int bForce)
2158 : {
2159 1 : if (m_poAttrQuery == nullptr && m_poFilterGeom == nullptr)
2160 : {
2161 1 : return m_nFeatureCount;
2162 : }
2163 0 : return OGRLayer::GetFeatureCount(bForce);
2164 : }
2165 :
2166 : /************************************************************************/
2167 : /* TestCapability() */
2168 : /************************************************************************/
2169 :
2170 626 : inline int OGRArrowWriterLayer::TestCapability(const char *pszCap)
2171 : {
2172 626 : if (EQUAL(pszCap, OLCCreateField) || EQUAL(pszCap, OLCCreateGeomField))
2173 27 : return m_poSchema == nullptr;
2174 :
2175 599 : if (EQUAL(pszCap, OLCSequentialWrite))
2176 24 : return true;
2177 :
2178 575 : if (EQUAL(pszCap, OLCFastWriteArrowBatch))
2179 0 : return true;
2180 :
2181 575 : if (EQUAL(pszCap, OLCStringsAsUTF8))
2182 1 : return true;
2183 :
2184 574 : if (EQUAL(pszCap, OLCMeasuredGeometries))
2185 227 : return true;
2186 :
2187 347 : return false;
2188 : }
2189 :
2190 : /************************************************************************/
2191 : /* WriteArrays() */
2192 : /************************************************************************/
2193 :
2194 266 : inline bool OGRArrowWriterLayer::WriteArrays(
2195 : std::function<bool(const std::shared_ptr<arrow::Field> &,
2196 : const std::shared_ptr<arrow::Array> &)>
2197 : postProcessArray)
2198 : {
2199 266 : int nArrowIdx = 0;
2200 266 : int nArrowIdxFirstField = !m_osFIDColumn.empty() ? 1 : 0;
2201 1921 : for (const auto &poBuilder : m_apoBuilders)
2202 : {
2203 1655 : const auto &field = m_poSchema->fields()[nArrowIdx];
2204 :
2205 0 : std::shared_ptr<arrow::Array> array;
2206 1655 : auto status = poBuilder->Finish(&array);
2207 1655 : if (!status.ok())
2208 : {
2209 0 : CPLError(CE_Failure, CPLE_AppDefined,
2210 : "builder::Finish() for field %s failed with %s",
2211 0 : field->name().c_str(), status.message().c_str());
2212 0 : return false;
2213 : }
2214 :
2215 : // CPLDebug("ARROW", "%s", array->ToString().c_str());
2216 :
2217 1655 : const int iCol = nArrowIdx - nArrowIdxFirstField;
2218 1655 : if (iCol >= 0 && iCol < m_poFeatureDefn->GetFieldCount())
2219 : {
2220 1342 : const auto poFieldDefn = m_poFeatureDefn->GetFieldDefn(iCol);
2221 1342 : const auto eFieldType = poFieldDefn->GetType();
2222 1342 : if (eFieldType == OFTInteger || eFieldType == OFTInteger64)
2223 : {
2224 219 : const auto &osDomainName = poFieldDefn->GetDomainName();
2225 : const auto oIter =
2226 219 : m_oMapFieldDomainToStringArray.find(osDomainName);
2227 219 : if (oIter != m_oMapFieldDomainToStringArray.end())
2228 : {
2229 : auto result = arrow::DictionaryArray::FromArrays(
2230 12 : field->type(), array, oIter->second);
2231 12 : if (!result.ok())
2232 : {
2233 0 : CPLError(CE_Failure, CPLE_AppDefined,
2234 : "DictionaryArray::FromArrays() for field %s "
2235 : "failed with %s",
2236 0 : field->name().c_str(),
2237 0 : result.status().message().c_str());
2238 0 : return false;
2239 : }
2240 12 : array = *result;
2241 : }
2242 : }
2243 : }
2244 :
2245 1655 : if (!postProcessArray(field, array))
2246 : {
2247 0 : return false;
2248 : }
2249 :
2250 1655 : nArrowIdx++;
2251 : }
2252 :
2253 266 : if (m_bWriteBBoxStruct)
2254 : {
2255 176 : const int nGeomFieldCount = m_poFeatureDefn->GetGeomFieldCount();
2256 356 : for (int i = 0; i < nGeomFieldCount; ++i)
2257 : {
2258 180 : const auto &field = m_apoFieldsBBOX[i];
2259 0 : std::shared_ptr<arrow::Array> array;
2260 180 : auto status = m_apoBuildersBBOXStruct[i]->Finish(&array);
2261 180 : if (!status.ok())
2262 : {
2263 0 : CPLError(CE_Failure, CPLE_AppDefined,
2264 : "builder::Finish() for field %s failed with %s",
2265 0 : field->name().c_str(), status.message().c_str());
2266 0 : return false;
2267 : }
2268 :
2269 180 : if (!postProcessArray(field, array))
2270 : {
2271 0 : return false;
2272 : }
2273 : }
2274 : }
2275 :
2276 266 : return true;
2277 : }
2278 :
2279 : /************************************************************************/
2280 : /* TestBit() */
2281 : /************************************************************************/
2282 :
2283 504 : static inline bool TestBit(const uint8_t *pabyData, size_t nIdx)
2284 : {
2285 504 : return (pabyData[nIdx / 8] & (1 << (nIdx % 8))) != 0;
2286 : }
2287 :
2288 : /************************************************************************/
2289 : /* WriteArrowBatchInternal() */
2290 : /************************************************************************/
2291 :
2292 126 : inline bool OGRArrowWriterLayer::WriteArrowBatchInternal(
2293 : const struct ArrowSchema *schema, struct ArrowArray *array,
2294 : CSLConstList papszOptions,
2295 : std::function<bool(const std::shared_ptr<arrow::RecordBatch> &)> writeBatch)
2296 : {
2297 126 : if (m_poSchema == nullptr)
2298 : {
2299 121 : CreateSchema();
2300 : }
2301 :
2302 126 : if (!IsFileWriterCreated())
2303 : {
2304 121 : CreateWriter();
2305 121 : if (!IsFileWriterCreated())
2306 0 : return false;
2307 : }
2308 :
2309 126 : if (m_apoBuilders.empty())
2310 : {
2311 121 : CreateArrayBuilders();
2312 : }
2313 :
2314 126 : const int nGeomFieldCount = m_poFeatureDefn->GetGeomFieldCount();
2315 126 : const int nGeomFieldCountBBoxFields =
2316 126 : m_bWriteBBoxStruct ? nGeomFieldCount : 0;
2317 :
2318 126 : const char *pszFIDName = CSLFetchNameValueDef(
2319 : papszOptions, "FID", OGRLayer::DEFAULT_ARROW_FID_NAME);
2320 : const char *pszSingleGeomFieldName =
2321 126 : CSLFetchNameValue(papszOptions, "GEOMETRY_NAME");
2322 :
2323 : // Sort schema and array children in the same order as m_poSchema.
2324 : // This is needed for non-WKB geometry encoding
2325 252 : std::map<std::string, int> oMapSchemaChildrenNameToIdx;
2326 1644 : for (int i = 0; i < static_cast<int>(schema->n_children); ++i)
2327 : {
2328 1518 : if (cpl::contains(oMapSchemaChildrenNameToIdx,
2329 1518 : schema->children[i]->name))
2330 : {
2331 0 : CPLError(CE_Failure, CPLE_AppDefined,
2332 : "Several fields with same name '%s' found",
2333 0 : schema->children[i]->name);
2334 0 : return false;
2335 : }
2336 1518 : oMapSchemaChildrenNameToIdx[schema->children[i]->name] = i;
2337 :
2338 1518 : if (!pszSingleGeomFieldName && schema->children[i]->metadata)
2339 : {
2340 : const auto oMetadata =
2341 256 : OGRParseArrowMetadata(schema->children[i]->metadata);
2342 128 : const auto oIter = oMetadata.find(ARROW_EXTENSION_NAME_KEY);
2343 262 : if (oIter != oMetadata.end() &&
2344 134 : (oIter->second == EXTENSION_NAME_OGC_WKB ||
2345 8 : oIter->second == EXTENSION_NAME_GEOARROW_WKB))
2346 : {
2347 126 : pszSingleGeomFieldName = schema->children[i]->name;
2348 : }
2349 : }
2350 : }
2351 126 : if (!pszSingleGeomFieldName)
2352 0 : pszSingleGeomFieldName = OGRLayer::DEFAULT_ARROW_GEOMETRY_NAME;
2353 :
2354 126 : std::vector<int> anMapLayerSchemaToArraySchema(m_poSchema->num_fields(),
2355 378 : -1);
2356 : struct ArrowArray fidArray;
2357 : struct ArrowSchema fidSchema;
2358 126 : memset(&fidArray, 0, sizeof(fidArray));
2359 126 : memset(&fidSchema, 0, sizeof(fidSchema));
2360 252 : std::vector<void *> apBuffersFid;
2361 252 : std::vector<int64_t> fids;
2362 :
2363 252 : std::set<int> oSetReferencedFieldsInArraySchema;
2364 0 : const auto DummyFreeArray = [](struct ArrowArray *ptrArray)
2365 0 : { ptrArray->release = nullptr; };
2366 126 : const auto DummyFreeSchema = [](struct ArrowSchema *ptrSchema)
2367 126 : { ptrSchema->release = nullptr; };
2368 126 : bool bRebuildBatch = false;
2369 1642 : for (int i = 0; i < m_poSchema->num_fields() - nGeomFieldCountBBoxFields;
2370 : ++i)
2371 : {
2372 : auto oIter =
2373 1516 : oMapSchemaChildrenNameToIdx.find(m_poSchema->field(i)->name());
2374 1516 : if (oIter == oMapSchemaChildrenNameToIdx.end())
2375 : {
2376 4 : if (m_poSchema->field(i)->name() == m_osFIDColumn)
2377 : {
2378 1 : oIter = oMapSchemaChildrenNameToIdx.find(pszFIDName);
2379 1 : if (oIter == oMapSchemaChildrenNameToIdx.end())
2380 : {
2381 : // If the input data does not contain a FID column, but
2382 : // the output file requires it, creates a default FID column
2383 0 : fidArray.release = DummyFreeArray;
2384 0 : fidArray.n_buffers = 2;
2385 0 : apBuffersFid.resize(2);
2386 0 : fidArray.buffers =
2387 0 : const_cast<const void **>(apBuffersFid.data());
2388 0 : fids.reserve(static_cast<size_t>(array->length));
2389 0 : for (size_t iRow = 0;
2390 0 : iRow < static_cast<size_t>(array->length); ++iRow)
2391 0 : fids.push_back(m_nFeatureCount + iRow);
2392 0 : fidArray.buffers[1] = fids.data();
2393 0 : fidArray.length = array->length;
2394 0 : fidSchema.release = DummyFreeSchema;
2395 0 : fidSchema.name = m_osFIDColumn.c_str();
2396 0 : fidSchema.format = "l"; // int64
2397 0 : continue;
2398 : }
2399 : }
2400 6 : else if (nGeomFieldCount == 1 &&
2401 3 : m_poFeatureDefn->GetGeomFieldIndex(
2402 3 : m_poSchema->field(i)->name().c_str()) == 0)
2403 : {
2404 : oIter =
2405 3 : oMapSchemaChildrenNameToIdx.find(pszSingleGeomFieldName);
2406 3 : if (oIter != oMapSchemaChildrenNameToIdx.end())
2407 3 : bRebuildBatch = true;
2408 : }
2409 :
2410 4 : if (oIter == oMapSchemaChildrenNameToIdx.end())
2411 : {
2412 0 : CPLError(CE_Failure, CPLE_AppDefined,
2413 : "Cannot find field '%s' in schema",
2414 0 : m_poSchema->field(i)->name().c_str());
2415 0 : return false;
2416 : }
2417 : }
2418 1516 : anMapLayerSchemaToArraySchema[i] = oIter->second;
2419 1516 : oSetReferencedFieldsInArraySchema.insert(oIter->second);
2420 : }
2421 :
2422 : // Note: we cheat a bit by declaring a single instance of the minx/miny/
2423 : // maxx/maxy sub-field ArrowSchema*, and make all struct ArrowSchema point
2424 : // to them. That's OK because we use DummyFreeSchema to release, which does
2425 : // nothing.
2426 : struct ArrowSchema bboxStructSchemaXMin;
2427 : struct ArrowSchema bboxStructSchemaYMin;
2428 : struct ArrowSchema bboxStructSchemaXMax;
2429 : struct ArrowSchema bboxStructSchemaYMax;
2430 126 : constexpr int BBOX_SUBFIELD_COUNT = 4;
2431 : std::array<struct ArrowSchema *, BBOX_SUBFIELD_COUNT>
2432 : bboxStructSchemaChildren;
2433 126 : constexpr int BBOX_STRUCT_BUFFER_COUNT = 1; // validity bitmap array
2434 : // cppcheck-suppress constStatement
2435 : std::vector<std::array<const void *, BBOX_STRUCT_BUFFER_COUNT>>
2436 252 : bboxStructBuffersPtr;
2437 252 : std::vector<std::vector<GByte>> aabyBboxStructValidity;
2438 252 : std::vector<std::vector<float>> aadfMinX, aadfMinY, aadfMaxX, aadfMaxY;
2439 : // cppcheck-suppress constStatement
2440 252 : std::vector<std::array<struct ArrowArray, BBOX_SUBFIELD_COUNT>> bboxArrays;
2441 : // cppcheck-suppress constStatement
2442 : std::vector<std::array<struct ArrowArray *, BBOX_SUBFIELD_COUNT>>
2443 252 : bboxArraysPtr;
2444 126 : constexpr int BBOX_SUBFIELD_BUFFER_COUNT =
2445 : 2; // validity bitmap array and float array
2446 : std::vector<std::array<std::array<const void *, BBOX_SUBFIELD_BUFFER_COUNT>,
2447 : BBOX_SUBFIELD_COUNT>>
2448 252 : bboxBuffersPtr;
2449 :
2450 : // Temporary arrays to hold the geometry bounding boxes.
2451 252 : std::vector<struct ArrowArray> bboxStructArray;
2452 252 : std::vector<struct ArrowSchema> bboxStructSchema;
2453 :
2454 252 : std::vector<struct ArrowSchema *> newSchemaChildren;
2455 252 : std::vector<struct ArrowArray *> newArrayChildren;
2456 126 : newSchemaChildren.reserve(m_poSchema->num_fields());
2457 126 : newArrayChildren.reserve(m_poSchema->num_fields());
2458 1642 : for (int i = 0; i < m_poSchema->num_fields() - nGeomFieldCountBBoxFields;
2459 : ++i)
2460 : {
2461 1516 : if (anMapLayerSchemaToArraySchema[i] < 0)
2462 : {
2463 0 : CPLAssert(m_poSchema->field(i)->name() == m_osFIDColumn);
2464 0 : newSchemaChildren.emplace_back(&fidSchema);
2465 0 : newArrayChildren.emplace_back(&fidArray);
2466 : }
2467 : else
2468 : {
2469 : newSchemaChildren.emplace_back(
2470 1516 : schema->children[anMapLayerSchemaToArraySchema[i]]);
2471 : newArrayChildren.emplace_back(
2472 1516 : array->children[anMapLayerSchemaToArraySchema[i]]);
2473 : }
2474 : }
2475 :
2476 126 : if (m_bWriteBBoxStruct)
2477 : {
2478 14 : memset(&bboxStructSchemaXMin, 0, sizeof(bboxStructSchemaXMin));
2479 14 : memset(&bboxStructSchemaYMin, 0, sizeof(bboxStructSchemaYMin));
2480 14 : memset(&bboxStructSchemaXMax, 0, sizeof(bboxStructSchemaXMax));
2481 14 : memset(&bboxStructSchemaYMax, 0, sizeof(bboxStructSchemaYMax));
2482 :
2483 14 : bboxStructSchemaXMin.release = DummyFreeSchema;
2484 14 : bboxStructSchemaXMin.name = "xmin";
2485 14 : bboxStructSchemaXMin.format = "f"; // float32
2486 :
2487 14 : bboxStructSchemaYMin.release = DummyFreeSchema;
2488 14 : bboxStructSchemaYMin.name = "ymin";
2489 14 : bboxStructSchemaYMin.format = "f"; // float32
2490 :
2491 14 : bboxStructSchemaXMax.release = DummyFreeSchema;
2492 14 : bboxStructSchemaXMax.name = "xmax";
2493 14 : bboxStructSchemaXMax.format = "f"; // float32
2494 :
2495 14 : bboxStructSchemaYMax.release = DummyFreeSchema;
2496 14 : bboxStructSchemaYMax.name = "ymax";
2497 14 : bboxStructSchemaYMax.format = "f"; // float32
2498 :
2499 : try
2500 : {
2501 14 : constexpr int XMIN_IDX = 0;
2502 14 : constexpr int YMIN_IDX = 1;
2503 14 : constexpr int XMAX_IDX = 2;
2504 14 : constexpr int YMAX_IDX = 3;
2505 14 : bboxStructSchemaChildren[XMIN_IDX] = &bboxStructSchemaXMin;
2506 : // cppcheck-suppress objectIndex
2507 14 : bboxStructSchemaChildren[YMIN_IDX] = &bboxStructSchemaYMin;
2508 : // cppcheck-suppress objectIndex
2509 14 : bboxStructSchemaChildren[XMAX_IDX] = &bboxStructSchemaXMax;
2510 : // cppcheck-suppress objectIndex
2511 14 : bboxStructSchemaChildren[YMAX_IDX] = &bboxStructSchemaYMax;
2512 :
2513 14 : bboxStructArray.resize(nGeomFieldCount);
2514 14 : bboxStructSchema.resize(nGeomFieldCount);
2515 14 : bboxArrays.resize(nGeomFieldCount);
2516 14 : bboxArraysPtr.resize(nGeomFieldCount);
2517 14 : bboxBuffersPtr.resize(nGeomFieldCount);
2518 14 : bboxStructBuffersPtr.resize(nGeomFieldCount);
2519 14 : aabyBboxStructValidity.resize(nGeomFieldCount);
2520 28 : memset(bboxStructArray.data(), 0,
2521 14 : nGeomFieldCount * sizeof(bboxStructArray[0]));
2522 28 : memset(bboxStructSchema.data(), 0,
2523 14 : nGeomFieldCount * sizeof(bboxStructSchema[0]));
2524 28 : memset(bboxArrays.data(), 0,
2525 14 : nGeomFieldCount * sizeof(bboxArrays[0]));
2526 14 : aadfMinX.resize(nGeomFieldCount);
2527 14 : aadfMinY.resize(nGeomFieldCount);
2528 14 : aadfMaxX.resize(nGeomFieldCount);
2529 14 : aadfMaxY.resize(nGeomFieldCount);
2530 28 : for (int i = 0; i < nGeomFieldCount; ++i)
2531 : {
2532 14 : const bool bIsNullable = CPL_TO_BOOL(
2533 14 : m_poFeatureDefn->GetGeomFieldDefn(i)->IsNullable());
2534 14 : aadfMinX[i].reserve(static_cast<size_t>(array->length));
2535 14 : aadfMinY[i].reserve(static_cast<size_t>(array->length));
2536 14 : aadfMaxX[i].reserve(static_cast<size_t>(array->length));
2537 14 : aadfMaxY[i].reserve(static_cast<size_t>(array->length));
2538 14 : aabyBboxStructValidity[i].resize(
2539 14 : static_cast<size_t>(array->length + 7) / 8, 0xFF);
2540 :
2541 14 : bboxStructSchema[i].release = DummyFreeSchema;
2542 14 : bboxStructSchema[i].name = m_apoFieldsBBOX[i]->name().c_str();
2543 14 : bboxStructSchema[i].format = "+s"; // structure
2544 14 : bboxStructSchema[i].flags =
2545 14 : bIsNullable ? ARROW_FLAG_NULLABLE : 0;
2546 14 : bboxStructSchema[i].n_children = BBOX_SUBFIELD_COUNT;
2547 14 : bboxStructSchema[i].children = bboxStructSchemaChildren.data();
2548 :
2549 14 : constexpr int VALIDITY_ARRAY_IDX = 0;
2550 14 : constexpr int BBOX_SUBFIELD_FLOAT_VALUE_IDX = 1;
2551 14 : bboxBuffersPtr[i][XMIN_IDX][BBOX_SUBFIELD_FLOAT_VALUE_IDX] =
2552 14 : aadfMinX[i].data();
2553 14 : bboxBuffersPtr[i][YMIN_IDX][BBOX_SUBFIELD_FLOAT_VALUE_IDX] =
2554 14 : aadfMinY[i].data();
2555 14 : bboxBuffersPtr[i][XMAX_IDX][BBOX_SUBFIELD_FLOAT_VALUE_IDX] =
2556 14 : aadfMaxX[i].data();
2557 14 : bboxBuffersPtr[i][YMAX_IDX][BBOX_SUBFIELD_FLOAT_VALUE_IDX] =
2558 14 : aadfMaxY[i].data();
2559 :
2560 70 : for (int j = 0; j < BBOX_SUBFIELD_COUNT; ++j)
2561 : {
2562 56 : bboxBuffersPtr[i][j][VALIDITY_ARRAY_IDX] = nullptr;
2563 :
2564 56 : bboxArrays[i][j].release = DummyFreeArray;
2565 56 : bboxArrays[i][j].length = array->length;
2566 56 : bboxArrays[i][j].n_buffers = BBOX_SUBFIELD_BUFFER_COUNT;
2567 56 : bboxArrays[i][j].buffers = bboxBuffersPtr[i][j].data();
2568 :
2569 56 : bboxArraysPtr[i][j] = &bboxArrays[i][j];
2570 : }
2571 :
2572 14 : bboxStructArray[i].release = DummyFreeArray;
2573 14 : bboxStructArray[i].n_children = BBOX_SUBFIELD_COUNT;
2574 : // coverity[escape]
2575 14 : bboxStructArray[i].children = bboxArraysPtr[i].data();
2576 14 : bboxStructArray[i].length = array->length;
2577 14 : bboxStructArray[i].n_buffers = BBOX_STRUCT_BUFFER_COUNT;
2578 14 : bboxStructBuffersPtr[i][VALIDITY_ARRAY_IDX] =
2579 14 : bIsNullable ? aabyBboxStructValidity[i].data() : nullptr;
2580 : // coverity[escape]
2581 14 : bboxStructArray[i].buffers = bboxStructBuffersPtr[i].data();
2582 :
2583 14 : newSchemaChildren.emplace_back(&bboxStructSchema[i]);
2584 14 : newArrayChildren.emplace_back(&bboxStructArray[i]);
2585 : }
2586 : }
2587 0 : catch (const std::bad_alloc &)
2588 : {
2589 0 : CPLError(CE_Failure, CPLE_OutOfMemory,
2590 : "Out of memory in "
2591 : "OGRArrowWriterLayer::WriteArrowBatchInternal()");
2592 0 : return false;
2593 : }
2594 : }
2595 :
2596 1644 : for (int i = 0; i < static_cast<int>(schema->n_children); ++i)
2597 : {
2598 1518 : if (!cpl::contains(oSetReferencedFieldsInArraySchema, i))
2599 : {
2600 4 : if (m_osFIDColumn.empty() &&
2601 2 : strcmp(schema->children[i]->name, pszFIDName) == 0)
2602 : {
2603 : // If the input data contains a FID column, but the output data
2604 : // does not, then ignore it.
2605 : }
2606 : else
2607 : {
2608 0 : CPLError(CE_Failure, CPLE_AppDefined,
2609 : "Found field '%s' in array schema that does not exist "
2610 : "in layer schema",
2611 0 : schema->children[i]->name);
2612 0 : return false;
2613 : }
2614 : }
2615 : }
2616 :
2617 : // ImportSchema() would release the schema, but we don't want that
2618 : // So copy the structure content into a local variable, and override its
2619 : // release callback to a no-op. This may be a bit fragile, but it doesn't
2620 : // look like ImportSchema implementation tries to access the C ArrowSchema
2621 : // after it has been called.
2622 126 : struct ArrowSchema lSchema = *schema;
2623 126 : schema = &lSchema;
2624 126 : CPL_IGNORE_RET_VAL(schema);
2625 :
2626 126 : lSchema.n_children = newSchemaChildren.size();
2627 126 : lSchema.children = newSchemaChildren.data();
2628 :
2629 126 : lSchema.release = DummyFreeSchema;
2630 252 : auto poSchemaResult = arrow::ImportSchema(&lSchema);
2631 126 : CPLAssert(lSchema.release == nullptr);
2632 126 : if (!poSchemaResult.ok())
2633 : {
2634 0 : CPLError(CE_Failure, CPLE_AppDefined, "ImportSchema() failed with %s",
2635 0 : poSchemaResult.status().message().c_str());
2636 0 : return false;
2637 : }
2638 252 : auto poSchema = *poSchemaResult;
2639 :
2640 : // Hack the array to use the new children we've computed above
2641 : // but make sure the original release() callback sees the original children
2642 : struct ArrayReleaser
2643 : {
2644 : struct ArrowArray ori_array
2645 : {
2646 : };
2647 :
2648 126 : explicit ArrayReleaser(struct ArrowArray *array)
2649 126 : {
2650 126 : memcpy(&ori_array, array, sizeof(*array));
2651 126 : array->release = ArrayReleaser::release;
2652 126 : array->private_data = this;
2653 126 : }
2654 :
2655 126 : static void release(struct ArrowArray *array)
2656 : {
2657 126 : struct ArrayReleaser *releaser =
2658 : static_cast<struct ArrayReleaser *>(array->private_data);
2659 126 : memcpy(array, &(releaser->ori_array), sizeof(*array));
2660 126 : CPLAssert(array->release != nullptr);
2661 126 : array->release(array);
2662 126 : CPLAssert(array->release == nullptr);
2663 126 : delete releaser;
2664 126 : }
2665 : };
2666 :
2667 : // Must be allocated on the heap, since ArrayReleaser::release() will be
2668 : // called after this method has ended.
2669 126 : ArrayReleaser *releaser = new ArrayReleaser(array);
2670 126 : array->private_data = releaser;
2671 126 : array->n_children = newArrayChildren.size();
2672 : // cppcheck-suppress autoVariables
2673 126 : array->children = newArrayChildren.data();
2674 :
2675 : // Process geometry columns:
2676 : // - if the output encoding is WKB, then just note the geometry type and
2677 : // envelope.
2678 : // - otherwise convert to the output encoding.
2679 126 : int nBuilderIdx = 0;
2680 126 : if (!m_osFIDColumn.empty())
2681 : {
2682 2 : nBuilderIdx++;
2683 : }
2684 : std::map<std::string, std::shared_ptr<arrow::Array>>
2685 252 : oMapGeomFieldNameToArray;
2686 252 : for (int i = 0; i < nGeomFieldCount; ++i, ++nBuilderIdx)
2687 : {
2688 : const char *pszThisGeomFieldName =
2689 126 : m_poFeatureDefn->GetGeomFieldDefn(i)->GetNameRef();
2690 126 : int nIdx = poSchema->GetFieldIndex(pszThisGeomFieldName);
2691 126 : if (nIdx < 0)
2692 : {
2693 3 : if (nGeomFieldCount == 1)
2694 3 : nIdx = poSchema->GetFieldIndex(pszSingleGeomFieldName);
2695 3 : if (nIdx < 0)
2696 : {
2697 0 : CPLError(CE_Failure, CPLE_AppDefined,
2698 : "Cannot find geometry field '%s' in schema",
2699 : pszThisGeomFieldName);
2700 0 : return false;
2701 : }
2702 : }
2703 :
2704 126 : if (strcmp(lSchema.children[nIdx]->format, "z") != 0 &&
2705 1 : strcmp(lSchema.children[nIdx]->format, "Z") != 0)
2706 : {
2707 0 : CPLError(CE_Failure, CPLE_AppDefined,
2708 : "Type of geometry field '%s' is not binary, but '%s'",
2709 0 : pszThisGeomFieldName, lSchema.children[nIdx]->format);
2710 0 : return false;
2711 : }
2712 :
2713 126 : const auto psGeomArray = array->children[nIdx];
2714 126 : const uint8_t *pabyValidity =
2715 126 : psGeomArray->null_count != 0
2716 126 : ? static_cast<const uint8_t *>(psGeomArray->buffers[0])
2717 : : nullptr;
2718 126 : const bool bUseOffsets32 =
2719 126 : (strcmp(lSchema.children[nIdx]->format, "z") == 0);
2720 126 : const uint32_t *panOffsets32 =
2721 126 : static_cast<const uint32_t *>(psGeomArray->buffers[1]) +
2722 126 : psGeomArray->offset;
2723 126 : const uint64_t *panOffsets64 =
2724 126 : static_cast<const uint64_t *>(psGeomArray->buffers[1]) +
2725 126 : psGeomArray->offset;
2726 126 : GByte *pabyData =
2727 126 : static_cast<GByte *>(const_cast<void *>(psGeomArray->buffers[2]));
2728 126 : OGREnvelope sEnvelope;
2729 126 : auto poBuilder = m_apoBuilders[nBuilderIdx].get();
2730 :
2731 681 : for (size_t iRow = 0; iRow < static_cast<size_t>(psGeomArray->length);
2732 : ++iRow)
2733 : {
2734 555 : bool bValidGeom = false;
2735 :
2736 1059 : if (!pabyValidity ||
2737 504 : TestBit(pabyValidity,
2738 504 : static_cast<size_t>(iRow + psGeomArray->offset)))
2739 : {
2740 439 : const auto nLen =
2741 439 : bUseOffsets32 ? static_cast<size_t>(panOffsets32[iRow + 1] -
2742 429 : panOffsets32[iRow])
2743 10 : : static_cast<size_t>(panOffsets64[iRow + 1] -
2744 10 : panOffsets64[iRow]);
2745 439 : GByte *pabyWkb =
2746 439 : pabyData + (bUseOffsets32
2747 429 : ? panOffsets32[iRow]
2748 10 : : static_cast<size_t>(panOffsets64[iRow]));
2749 439 : if (m_aeGeomEncoding[i] == OGRArrowGeomEncoding::WKB)
2750 : {
2751 171 : FixupWKBGeometryBeforeWriting(pabyWkb, nLen);
2752 :
2753 171 : uint32_t nType = 0;
2754 171 : bool bNeedSwap = false;
2755 171 : if (OGRWKBGetGeomType(pabyWkb, nLen, bNeedSwap, nType))
2756 : {
2757 171 : m_oSetWrittenGeometryTypes[i].insert(
2758 171 : static_cast<OGRwkbGeometryType>(nType));
2759 171 : if (OGRWKBGetBoundingBox(pabyWkb, nLen, sEnvelope))
2760 : {
2761 171 : bValidGeom = true;
2762 171 : m_aoEnvelopes[i].Merge(sEnvelope);
2763 :
2764 171 : if (m_bWriteBBoxStruct)
2765 : {
2766 43 : aadfMinX[i].push_back(
2767 43 : castToFloatDown(sEnvelope.MinX));
2768 43 : aadfMinY[i].push_back(
2769 43 : castToFloatDown(sEnvelope.MinY));
2770 43 : aadfMaxX[i].push_back(
2771 43 : castToFloatUp(sEnvelope.MaxX));
2772 43 : aadfMaxY[i].push_back(
2773 43 : castToFloatUp(sEnvelope.MaxY));
2774 : }
2775 : }
2776 : }
2777 : }
2778 : else
2779 : {
2780 268 : size_t nBytesConsumedOut = 0;
2781 268 : OGRGeometry *poGeometry = nullptr;
2782 268 : OGRGeometryFactory::createFromWkb(
2783 : pabyWkb, nullptr, &poGeometry, nLen, wkbVariantIso,
2784 : nBytesConsumedOut);
2785 268 : if (BuildGeometry(poGeometry, i, poBuilder) != OGRERR_NONE)
2786 : {
2787 0 : delete poGeometry;
2788 0 : return false;
2789 : }
2790 268 : bValidGeom = true;
2791 268 : if (m_bWriteBBoxStruct)
2792 : {
2793 0 : poGeometry->getEnvelope(&sEnvelope);
2794 0 : aadfMinX[i].push_back(castToFloatDown(sEnvelope.MinX));
2795 0 : aadfMinY[i].push_back(castToFloatDown(sEnvelope.MinY));
2796 0 : aadfMaxX[i].push_back(castToFloatUp(sEnvelope.MaxX));
2797 0 : aadfMaxY[i].push_back(castToFloatUp(sEnvelope.MaxY));
2798 : }
2799 268 : delete poGeometry;
2800 : }
2801 : }
2802 : else
2803 : {
2804 116 : if (m_aeGeomEncoding[i] != OGRArrowGeomEncoding::WKB)
2805 : {
2806 81 : if (BuildGeometry(nullptr, i, poBuilder) != OGRERR_NONE)
2807 0 : return false;
2808 : }
2809 : }
2810 :
2811 555 : if (!bValidGeom && m_bWriteBBoxStruct)
2812 : {
2813 6 : if ((bboxStructSchema[i].flags & ARROW_FLAG_NULLABLE))
2814 : {
2815 6 : bboxStructArray[i].null_count++;
2816 6 : aabyBboxStructValidity[i][iRow / 8] &=
2817 6 : ~(1 << static_cast<int>(iRow % 8));
2818 : }
2819 6 : aadfMinX[i].push_back(0.0f);
2820 6 : aadfMinY[i].push_back(0.0f);
2821 6 : aadfMaxX[i].push_back(0.0f);
2822 6 : aadfMaxY[i].push_back(0.0f);
2823 : }
2824 : }
2825 :
2826 126 : if (m_aeGeomEncoding[i] != OGRArrowGeomEncoding::WKB)
2827 : {
2828 0 : std::shared_ptr<arrow::Array> geomArray;
2829 81 : auto status = poBuilder->Finish(&geomArray);
2830 81 : if (!status.ok())
2831 : {
2832 0 : CPLError(CE_Failure, CPLE_AppDefined,
2833 : "builder::Finish() for field %s failed with %s",
2834 0 : pszThisGeomFieldName, status.message().c_str());
2835 0 : return false;
2836 : }
2837 162 : oMapGeomFieldNameToArray[pszThisGeomFieldName] =
2838 162 : std::move(geomArray);
2839 : }
2840 : }
2841 :
2842 : auto poRecordBatchResult =
2843 252 : arrow::ImportRecordBatch(array, std::move(poSchema));
2844 126 : if (!poRecordBatchResult.ok())
2845 : {
2846 0 : CPLError(CE_Failure, CPLE_AppDefined,
2847 : "ImportRecordBatch() failed with %s",
2848 0 : poRecordBatchResult.status().message().c_str());
2849 0 : return false;
2850 : }
2851 252 : auto poRecordBatch = *poRecordBatchResult;
2852 :
2853 : // below assertion commented out since it is not strictly necessary, but
2854 : // reflects what ImportRecordBatch() does.
2855 : // CPLAssert(array->release == nullptr);
2856 :
2857 : // We may need to reconstruct a final record batch that perfectly matches
2858 : // the expected schema.
2859 126 : if (bRebuildBatch || !oMapGeomFieldNameToArray.empty())
2860 : {
2861 84 : std::vector<std::shared_ptr<arrow::Array>> apoArrays;
2862 662 : for (int i = 0; i < m_poSchema->num_fields(); ++i)
2863 : {
2864 : const auto oIter =
2865 578 : oMapGeomFieldNameToArray.find(m_poSchema->field(i)->name());
2866 578 : if (oIter != oMapGeomFieldNameToArray.end())
2867 81 : apoArrays.emplace_back(oIter->second);
2868 : else
2869 497 : apoArrays.emplace_back(poRecordBatch->column(i));
2870 578 : if (apoArrays.back()->type()->id() !=
2871 578 : m_poSchema->field(i)->type()->id())
2872 : {
2873 0 : CPLError(CE_Failure, CPLE_AppDefined,
2874 : "Field '%s' of unexpected type",
2875 0 : m_poSchema->field(i)->name().c_str());
2876 0 : return false;
2877 : }
2878 : }
2879 336 : poRecordBatchResult = arrow::RecordBatch::Make(
2880 252 : m_poSchema, poRecordBatch->num_rows(), std::move(apoArrays));
2881 84 : if (!poRecordBatchResult.ok())
2882 : {
2883 0 : CPLError(CE_Failure, CPLE_AppDefined,
2884 : "RecordBatch::Make() failed with %s",
2885 0 : poRecordBatchResult.status().message().c_str());
2886 0 : return false;
2887 : }
2888 84 : poRecordBatch = *poRecordBatchResult;
2889 : }
2890 :
2891 126 : if (writeBatch(poRecordBatch))
2892 : {
2893 126 : m_nFeatureCount += poRecordBatch->num_rows();
2894 126 : return true;
2895 : }
2896 0 : return false;
2897 : }
2898 :
2899 : #endif /* OGARROWWRITERLAYER_HPP_INCLUDED */
|