Line data Source code
1 : /******************************************************************************
2 : *
3 : * Project: OpenGIS Simple Features Reference Implementation
4 : * Purpose: Parts of OGRLayer dealing with Arrow C interface
5 : * Author: Even Rouault, <even dot rouault at spatialys.com>
6 : *
7 : ******************************************************************************
8 : * Copyright (c) 2022-2023, Even Rouault <even dot rouault at spatialys.com>
9 : *
10 : * SPDX-License-Identifier: MIT
11 : ****************************************************************************/
12 :
13 : #include "ogrsf_frmts.h"
14 : #include "ogr_api.h"
15 : #include "ogr_recordbatch.h"
16 : #include "ograrrowarrayhelper.h"
17 : #include "ogrlayerarrow.h"
18 : #include "ogr_p.h"
19 : #include "ogr_swq.h"
20 : #include "ogr_wkb.h"
21 : #include "ogr_p.h"
22 : #include "ogrlayer_private.h"
23 :
24 : #include "cpl_float.h"
25 : #include "cpl_json.h"
26 : #include "cpl_time.h"
27 :
28 : #include <algorithm>
29 : #include <cassert>
30 : #include <cinttypes>
31 : #include <limits>
32 : #include <utility>
33 : #include <set>
34 :
35 : constexpr const char *MD_GDAL_OGR_TYPE = "GDAL:OGR:type";
36 : constexpr const char *MD_GDAL_OGR_ALTERNATIVE_NAME =
37 : "GDAL:OGR:alternative_name";
38 : constexpr const char *MD_GDAL_OGR_COMMENT = "GDAL:OGR:comment";
39 : constexpr const char *MD_GDAL_OGR_DEFAULT = "GDAL:OGR:default";
40 : constexpr const char *MD_GDAL_OGR_SUBTYPE = "GDAL:OGR:subtype";
41 : constexpr const char *MD_GDAL_OGR_WIDTH = "GDAL:OGR:width";
42 : constexpr const char *MD_GDAL_OGR_UNIQUE = "GDAL:OGR:unique";
43 : constexpr const char *MD_GDAL_OGR_DOMAIN_NAME = "GDAL:OGR:domain_name";
44 :
45 : constexpr char ARROW_LETTER_BOOLEAN = 'b';
46 : constexpr char ARROW_LETTER_INT8 = 'c';
47 : constexpr char ARROW_LETTER_UINT8 = 'C';
48 : constexpr char ARROW_LETTER_INT16 = 's';
49 : constexpr char ARROW_LETTER_UINT16 = 'S';
50 : constexpr char ARROW_LETTER_INT32 = 'i';
51 : constexpr char ARROW_LETTER_UINT32 = 'I';
52 : constexpr char ARROW_LETTER_INT64 = 'l';
53 : constexpr char ARROW_LETTER_UINT64 = 'L';
54 : constexpr char ARROW_LETTER_FLOAT16 = 'e';
55 : constexpr char ARROW_LETTER_FLOAT32 = 'f';
56 : constexpr char ARROW_LETTER_FLOAT64 = 'g';
57 : constexpr char ARROW_LETTER_STRING = 'u';
58 : constexpr char ARROW_LETTER_LARGE_STRING = 'U';
59 : constexpr char ARROW_LETTER_BINARY = 'z';
60 : constexpr char ARROW_LETTER_LARGE_BINARY = 'Z';
61 : constexpr char ARROW_LETTER_DECIMAL = 'd';
62 : constexpr char ARROW_2ND_LETTER_LIST = 'l';
63 : constexpr char ARROW_2ND_LETTER_LARGE_LIST = 'L';
64 :
65 2753240 : static inline bool IsStructure(const char *format)
66 : {
67 2753240 : return format[0] == '+' && format[1] == 's' && format[2] == 0;
68 : }
69 :
70 23350 : static inline bool IsMap(const char *format)
71 : {
72 23350 : return format[0] == '+' && format[1] == 'm' && format[2] == 0;
73 : }
74 :
75 3145 : static inline bool IsFixedWidthBinary(const char *format)
76 : {
77 3145 : return format[0] == 'w' && format[1] == ':';
78 : }
79 :
80 202 : static inline int GetFixedWithBinary(const char *format)
81 : {
82 202 : return atoi(format + strlen("w:"));
83 : }
84 :
85 30557 : static inline bool IsList(const char *format)
86 : {
87 36678 : return format[0] == '+' && format[1] == ARROW_2ND_LETTER_LIST &&
88 36678 : format[2] == 0;
89 : }
90 :
91 20324 : static inline bool IsLargeList(const char *format)
92 : {
93 20448 : return format[0] == '+' && format[1] == ARROW_2ND_LETTER_LARGE_LIST &&
94 20448 : format[2] == 0;
95 : }
96 :
97 33414 : static inline bool IsFixedSizeList(const char *format)
98 : {
99 33414 : return format[0] == '+' && format[1] == 'w' && format[2] == ':';
100 : }
101 :
102 2311 : static inline int GetFixedSizeList(const char *format)
103 : {
104 2311 : return atoi(format + strlen("+w:"));
105 : }
106 :
107 2800 : static inline bool IsDecimal(const char *format)
108 : {
109 2800 : return format[0] == ARROW_LETTER_DECIMAL && format[1] == ':';
110 : }
111 :
112 1342140 : static inline bool IsBoolean(const char *format)
113 : {
114 1342140 : return format[0] == ARROW_LETTER_BOOLEAN && format[1] == 0;
115 : }
116 :
117 1338900 : static inline bool IsInt8(const char *format)
118 : {
119 1338900 : return format[0] == ARROW_LETTER_INT8 && format[1] == 0;
120 : }
121 :
122 1339020 : static inline bool IsUInt8(const char *format)
123 : {
124 1339020 : return format[0] == ARROW_LETTER_UINT8 && format[1] == 0;
125 : }
126 :
127 1337600 : static inline bool IsInt16(const char *format)
128 : {
129 1337600 : return format[0] == ARROW_LETTER_INT16 && format[1] == 0;
130 : }
131 :
132 1337700 : static inline bool IsUInt16(const char *format)
133 : {
134 1337700 : return format[0] == ARROW_LETTER_UINT16 && format[1] == 0;
135 : }
136 :
137 1396970 : static inline bool IsInt32(const char *format)
138 : {
139 1396970 : return format[0] == ARROW_LETTER_INT32 && format[1] == 0;
140 : }
141 :
142 1336060 : static inline bool IsUInt32(const char *format)
143 : {
144 1336060 : return format[0] == ARROW_LETTER_UINT32 && format[1] == 0;
145 : }
146 :
147 1389670 : static inline bool IsInt64(const char *format)
148 : {
149 1389670 : return format[0] == ARROW_LETTER_INT64 && format[1] == 0;
150 : }
151 :
152 1268540 : static inline bool IsUInt64(const char *format)
153 : {
154 1268540 : return format[0] == ARROW_LETTER_UINT64 && format[1] == 0;
155 : }
156 :
157 15140 : static inline bool IsFloat16(const char *format)
158 : {
159 15140 : return format[0] == ARROW_LETTER_FLOAT16 && format[1] == 0;
160 : }
161 :
162 1274950 : static inline bool IsFloat32(const char *format)
163 : {
164 1274950 : return format[0] == ARROW_LETTER_FLOAT32 && format[1] == 0;
165 : }
166 :
167 1266610 : static inline bool IsFloat64(const char *format)
168 : {
169 1266610 : return format[0] == ARROW_LETTER_FLOAT64 && format[1] == 0;
170 : }
171 :
172 2485410 : static inline bool IsString(const char *format)
173 : {
174 2485410 : return format[0] == ARROW_LETTER_STRING && format[1] == 0;
175 : }
176 :
177 74115 : static inline bool IsLargeString(const char *format)
178 : {
179 74115 : return format[0] == ARROW_LETTER_LARGE_STRING && format[1] == 0;
180 : }
181 :
182 79416 : static inline bool IsBinary(const char *format)
183 : {
184 79416 : return format[0] == ARROW_LETTER_BINARY && format[1] == 0;
185 : }
186 :
187 13002 : static inline bool IsLargeBinary(const char *format)
188 : {
189 13002 : return format[0] == ARROW_LETTER_LARGE_BINARY && format[1] == 0;
190 : }
191 :
192 14724 : static inline bool IsTimestampInternal(const char *format, char chType)
193 : {
194 16441 : return format[0] == 't' && format[1] == 's' && format[2] == chType &&
195 16441 : format[3] == ':';
196 : }
197 :
198 4399 : static inline bool IsTimestampSeconds(const char *format)
199 : {
200 4399 : return IsTimestampInternal(format, 's');
201 : }
202 :
203 4389 : static inline bool IsTimestampMilliseconds(const char *format)
204 : {
205 4389 : return IsTimestampInternal(format, 'm');
206 : }
207 :
208 3244 : static inline bool IsTimestampMicroseconds(const char *format)
209 : {
210 3244 : return IsTimestampInternal(format, 'u');
211 : }
212 :
213 2692 : static inline bool IsTimestampNanoseconds(const char *format)
214 : {
215 2692 : return IsTimestampInternal(format, 'n');
216 : }
217 :
218 3641 : static inline bool IsTimestamp(const char *format)
219 : {
220 9831 : return IsTimestampSeconds(format) || IsTimestampMilliseconds(format) ||
221 9831 : IsTimestampMicroseconds(format) || IsTimestampNanoseconds(format);
222 : }
223 :
224 107 : static inline const char *GetTimestampTimezone(const char *format)
225 : {
226 107 : return IsTimestamp(format) ? format + strlen("tm?:") : "";
227 : }
228 :
229 : /************************************************************************/
230 : /* TestBit() */
231 : /************************************************************************/
232 :
233 13127 : inline bool TestBit(const uint8_t *pabyData, size_t nIdx)
234 : {
235 13127 : return (pabyData[nIdx / 8] & (1 << (nIdx % 8))) != 0;
236 : }
237 :
238 : /************************************************************************/
239 : /* SetBit() */
240 : /************************************************************************/
241 :
242 9676 : inline void SetBit(uint8_t *pabyData, size_t nIdx)
243 : {
244 9676 : pabyData[nIdx / 8] |= (1 << (nIdx % 8));
245 9676 : }
246 :
247 : /************************************************************************/
248 : /* UnsetBit() */
249 : /************************************************************************/
250 :
251 12383 : inline void UnsetBit(uint8_t *pabyData, size_t nIdx)
252 : {
253 12383 : pabyData[nIdx / 8] &= uint8_t(~(1 << (nIdx % 8)));
254 12383 : }
255 :
256 : /************************************************************************/
257 : /* DefaultReleaseSchema() */
258 : /************************************************************************/
259 :
260 25427 : static void OGRLayerReleaseSchema(struct ArrowSchema *schema,
261 : bool bFullFreeFormat)
262 : {
263 25427 : CPLAssert(schema->release != nullptr);
264 25427 : if (bFullFreeFormat || STARTS_WITH(schema->format, "w:") ||
265 25395 : STARTS_WITH(schema->format, "tsm:"))
266 : {
267 1039 : CPLFree(const_cast<char *>(schema->format));
268 : }
269 25427 : CPLFree(const_cast<char *>(schema->name));
270 25427 : CPLFree(const_cast<char *>(schema->metadata));
271 25427 : if (schema->children)
272 : {
273 26081 : for (int i = 0; i < static_cast<int>(schema->n_children); ++i)
274 : {
275 22827 : if (schema->children[i] && schema->children[i]->release)
276 : {
277 22827 : schema->children[i]->release(schema->children[i]);
278 22827 : CPLFree(schema->children[i]);
279 : }
280 : }
281 3254 : CPLFree(schema->children);
282 : }
283 25427 : if (schema->dictionary)
284 : {
285 32 : if (schema->dictionary->release)
286 : {
287 32 : schema->dictionary->release(schema->dictionary);
288 32 : CPLFree(schema->dictionary);
289 : }
290 : }
291 25427 : schema->release = nullptr;
292 25427 : }
293 :
294 25404 : static void OGRLayerPartialReleaseSchema(struct ArrowSchema *schema)
295 : {
296 25404 : OGRLayerReleaseSchema(schema, /* bFullFreeFormat = */ false);
297 25404 : }
298 :
299 23 : static void OGRLayerFullReleaseSchema(struct ArrowSchema *schema)
300 : {
301 23 : OGRLayerReleaseSchema(schema, /* bFullFreeFormat = */ true);
302 23 : }
303 :
304 : /** Release a ArrowSchema.
305 : *
306 : * To be used by driver implementations that have a custom GetArrowStream()
307 : * implementation.
308 : *
309 : * @param schema Schema to release.
310 : * @since GDAL 3.6
311 : */
312 :
313 25372 : void OGRLayer::ReleaseSchema(struct ArrowSchema *schema)
314 : {
315 25372 : OGRLayerPartialReleaseSchema(schema);
316 25372 : }
317 :
318 : /************************************************************************/
319 : /* AddDictToSchema() */
320 : /************************************************************************/
321 :
322 32 : static void AddDictToSchema(struct ArrowSchema *psChild,
323 : const OGRCodedFieldDomain *poCodedDomain)
324 : {
325 32 : const OGRCodedValue *psIter = poCodedDomain->GetEnumeration();
326 32 : int nLastCode = -1;
327 32 : int nCountNull = 0;
328 32 : uint32_t nCountChars = 0;
329 112 : for (; psIter->pszCode; ++psIter)
330 : {
331 80 : if (CPLGetValueType(psIter->pszCode) != CPL_VALUE_INTEGER)
332 : {
333 0 : return;
334 : }
335 80 : int nCode = atoi(psIter->pszCode);
336 80 : if (nCode <= nLastCode || nCode - nLastCode > 100)
337 : {
338 0 : return;
339 : }
340 106 : for (int i = nLastCode + 1; i < nCode; ++i)
341 : {
342 26 : nCountNull++;
343 : }
344 80 : if (psIter->pszValue != nullptr)
345 : {
346 54 : const size_t nLen = strlen(psIter->pszValue);
347 54 : if (nLen > std::numeric_limits<uint32_t>::max() - nCountChars)
348 0 : return;
349 54 : nCountChars += static_cast<uint32_t>(nLen);
350 : }
351 : else
352 26 : nCountNull++;
353 80 : nLastCode = nCode;
354 : }
355 :
356 : auto psChildDict = static_cast<struct ArrowSchema *>(
357 32 : CPLCalloc(1, sizeof(struct ArrowSchema)));
358 32 : psChild->dictionary = psChildDict;
359 32 : psChildDict->release = OGRLayerPartialReleaseSchema;
360 32 : psChildDict->name = CPLStrdup(poCodedDomain->GetName().c_str());
361 32 : psChildDict->format = "u";
362 32 : if (nCountNull)
363 26 : psChildDict->flags = ARROW_FLAG_NULLABLE;
364 : }
365 :
366 : /************************************************************************/
367 : /* DefaultGetArrowSchema() */
368 : /************************************************************************/
369 :
370 : /** Default implementation of the ArrowArrayStream::get_schema() callback.
371 : *
372 : * To be used by driver implementations that have a custom GetArrowStream()
373 : * implementation.
374 : *
375 : * @since GDAL 3.6
376 : */
377 2234 : int OGRLayer::GetArrowSchema(struct ArrowArrayStream *,
378 : struct ArrowSchema *out_schema)
379 : {
380 2234 : const bool bIncludeFID = CPLTestBool(
381 : m_aosArrowArrayStreamOptions.FetchNameValueDef("INCLUDE_FID", "YES"));
382 2234 : const bool bDateTimeAsString = m_aosArrowArrayStreamOptions.FetchBool(
383 : GAS_OPT_DATETIME_AS_STRING, false);
384 2234 : memset(out_schema, 0, sizeof(*out_schema));
385 2234 : out_schema->format = "+s";
386 2234 : out_schema->name = CPLStrdup("");
387 2234 : out_schema->metadata = nullptr;
388 2234 : auto poLayerDefn = GetLayerDefn();
389 2234 : const int nFieldCount = poLayerDefn->GetFieldCount();
390 2234 : const int nGeomFieldCount = poLayerDefn->GetGeomFieldCount();
391 2234 : const int nChildren = 1 + nFieldCount + nGeomFieldCount;
392 :
393 2234 : out_schema->children = static_cast<struct ArrowSchema **>(
394 2234 : CPLCalloc(nChildren, sizeof(struct ArrowSchema *)));
395 2234 : int iSchemaChild = 0;
396 2234 : if (bIncludeFID)
397 : {
398 3940 : out_schema->children[iSchemaChild] = static_cast<struct ArrowSchema *>(
399 1970 : CPLCalloc(1, sizeof(struct ArrowSchema)));
400 1970 : auto psChild = out_schema->children[iSchemaChild];
401 1970 : ++iSchemaChild;
402 1970 : psChild->release = OGRLayer::ReleaseSchema;
403 1970 : const char *pszFIDName = GetFIDColumn();
404 1970 : psChild->name =
405 1970 : CPLStrdup((pszFIDName && pszFIDName[0]) ? pszFIDName
406 : : DEFAULT_ARROW_FID_NAME);
407 1970 : psChild->format = "l";
408 : }
409 20168 : for (int i = 0; i < nFieldCount; ++i)
410 : {
411 17934 : const auto poFieldDefn = poLayerDefn->GetFieldDefn(i);
412 17934 : if (poFieldDefn->IsIgnored())
413 : {
414 48 : continue;
415 : }
416 :
417 35772 : out_schema->children[iSchemaChild] = static_cast<struct ArrowSchema *>(
418 17886 : CPLCalloc(1, sizeof(struct ArrowSchema)));
419 17886 : auto psChild = out_schema->children[iSchemaChild];
420 17886 : ++iSchemaChild;
421 17886 : psChild->release = OGRLayer::ReleaseSchema;
422 17886 : psChild->name = CPLStrdup(poFieldDefn->GetNameRef());
423 17886 : if (poFieldDefn->IsNullable())
424 17100 : psChild->flags = ARROW_FLAG_NULLABLE;
425 17886 : const auto eType = poFieldDefn->GetType();
426 17886 : const auto eSubType = poFieldDefn->GetSubType();
427 17886 : const char *item_format = nullptr;
428 :
429 35772 : std::vector<std::pair<std::string, std::string>> oMetadata;
430 :
431 17886 : switch (eType)
432 : {
433 5739 : case OFTInteger:
434 : {
435 5739 : if (eSubType == OFSTBoolean)
436 286 : psChild->format = "b";
437 5453 : else if (eSubType == OFSTInt16)
438 673 : psChild->format = "s";
439 : else
440 4780 : psChild->format = "i";
441 :
442 5739 : const auto &osDomainName = poFieldDefn->GetDomainName();
443 5739 : if (!osDomainName.empty())
444 : {
445 32 : auto poDS = GetDataset();
446 32 : if (poDS)
447 : {
448 : const auto poFieldDomain =
449 32 : poDS->GetFieldDomain(osDomainName);
450 64 : if (poFieldDomain &&
451 32 : poFieldDomain->GetDomainType() == OFDT_CODED)
452 : {
453 32 : const OGRCodedFieldDomain *poCodedDomain =
454 : static_cast<const OGRCodedFieldDomain *>(
455 : poFieldDomain);
456 32 : AddDictToSchema(psChild, poCodedDomain);
457 : }
458 : }
459 : }
460 :
461 5739 : break;
462 : }
463 :
464 565 : case OFTInteger64:
465 565 : psChild->format = "l";
466 565 : break;
467 :
468 2886 : case OFTReal:
469 : {
470 2886 : if (eSubType == OFSTFloat32)
471 676 : psChild->format = "f";
472 : else
473 2210 : psChild->format = "g";
474 2886 : break;
475 : }
476 :
477 5124 : case OFTString:
478 : case OFTWideString:
479 5124 : psChild->format = "u";
480 5124 : break;
481 :
482 1209 : case OFTBinary:
483 : {
484 1209 : if (poFieldDefn->GetWidth() > 0)
485 9 : psChild->format =
486 9 : CPLStrdup(CPLSPrintf("w:%d", poFieldDefn->GetWidth()));
487 : else
488 1200 : psChild->format = "z";
489 1209 : break;
490 : }
491 :
492 383 : case OFTIntegerList:
493 : {
494 383 : if (eSubType == OFSTBoolean)
495 92 : item_format = "b";
496 291 : else if (eSubType == OFSTInt16)
497 67 : item_format = "s";
498 : else
499 224 : item_format = "i";
500 383 : break;
501 : }
502 :
503 97 : case OFTInteger64List:
504 97 : item_format = "l";
505 97 : break;
506 :
507 256 : case OFTRealList:
508 : {
509 256 : if (eSubType == OFSTFloat32)
510 84 : item_format = "f";
511 : else
512 172 : item_format = "g";
513 256 : break;
514 : }
515 :
516 273 : case OFTStringList:
517 : case OFTWideStringList:
518 273 : item_format = "u";
519 273 : break;
520 :
521 210 : case OFTDate:
522 210 : psChild->format = "tdD";
523 210 : break;
524 :
525 118 : case OFTTime:
526 118 : psChild->format = "ttm";
527 118 : break;
528 :
529 1026 : case OFTDateTime:
530 : {
531 1026 : const char *pszPrefix = "tsm:";
532 : const char *pszTZOverride =
533 1026 : m_aosArrowArrayStreamOptions.FetchNameValue("TIMEZONE");
534 1026 : const int nTZFlag = poFieldDefn->GetTZFlag();
535 1026 : if (bDateTimeAsString)
536 : {
537 19 : psChild->format = "u";
538 : }
539 1007 : else if (pszTZOverride && EQUAL(pszTZOverride, "unknown"))
540 : {
541 4 : psChild->format = CPLStrdup(pszPrefix);
542 : }
543 1003 : else if ((pszTZOverride && EQUAL(pszTZOverride, "mixed")) ||
544 963 : (!pszTZOverride && nTZFlag == OGR_TZFLAG_MIXED_TZ))
545 : {
546 : oMetadata.emplace_back(
547 6 : std::pair(ARROW_EXTENSION_NAME_KEY,
548 6 : EXTENSION_NAME_ARROW_TIMESTAMP_WITH_OFFSET));
549 :
550 6 : psChild->format = "+s";
551 6 : psChild->n_children = 2;
552 6 : psChild->children = static_cast<struct ArrowSchema **>(
553 6 : CPLCalloc(2, sizeof(struct ArrowSchema *)));
554 :
555 : // Create sub-child for timestamp in UTC
556 12 : psChild->children[0] = static_cast<struct ArrowSchema *>(
557 6 : CPLCalloc(1, sizeof(struct ArrowSchema)));
558 6 : psChild->children[0]->release = OGRLayer::ReleaseSchema;
559 12 : psChild->children[0]->name =
560 6 : CPLStrdup(ATSWO_TIMESTAMP_FIELD_NAME);
561 6 : psChild->children[0]->format = CPLStrdup("tsm:UTC");
562 :
563 : // Create sub-child for offset to UTC in minutes
564 12 : psChild->children[1] = static_cast<struct ArrowSchema *>(
565 6 : CPLCalloc(1, sizeof(struct ArrowSchema)));
566 6 : psChild->children[1]->release = OGRLayer::ReleaseSchema;
567 12 : psChild->children[1]->name =
568 6 : CPLStrdup(ATSWO_OFFSET_MINUTES_FIELD_NAME);
569 6 : psChild->children[1]->format = "s";
570 : }
571 997 : else if (pszTZOverride)
572 : {
573 40 : psChild->format = CPLStrdup(
574 80 : (std::string(pszPrefix) + pszTZOverride).c_str());
575 : }
576 : else
577 : {
578 957 : if (nTZFlag == OGR_TZFLAG_UTC)
579 : {
580 5 : psChild->format =
581 5 : CPLStrdup(CPLSPrintf("%sUTC", pszPrefix));
582 : }
583 952 : else if (nTZFlag == OGR_TZFLAG_UNKNOWN ||
584 : nTZFlag == OGR_TZFLAG_LOCALTIME)
585 : {
586 936 : psChild->format = CPLStrdup(pszPrefix);
587 : }
588 : else
589 : {
590 16 : psChild->format = CPLStrdup(
591 32 : (pszPrefix + OGRTZFlagToTimezone(nTZFlag, "UTC"))
592 : .c_str());
593 : }
594 : }
595 1026 : break;
596 : }
597 : }
598 :
599 17886 : if (item_format)
600 : {
601 1009 : psChild->format = "+l";
602 1009 : psChild->n_children = 1;
603 1009 : psChild->children = static_cast<struct ArrowSchema **>(
604 1009 : CPLCalloc(1, sizeof(struct ArrowSchema *)));
605 2018 : psChild->children[0] = static_cast<struct ArrowSchema *>(
606 1009 : CPLCalloc(1, sizeof(struct ArrowSchema)));
607 1009 : psChild->children[0]->release = OGRLayer::ReleaseSchema;
608 1009 : psChild->children[0]->name = CPLStrdup("item");
609 1009 : psChild->children[0]->format = item_format;
610 : }
611 :
612 17886 : if (eType == OFTDateTime && bDateTimeAsString)
613 : {
614 : oMetadata.emplace_back(
615 19 : std::pair(MD_GDAL_OGR_TYPE, OGR_GetFieldTypeName(eType)));
616 : }
617 :
618 17886 : const char *pszAlternativeName = poFieldDefn->GetAlternativeNameRef();
619 17886 : if (pszAlternativeName && pszAlternativeName[0])
620 : oMetadata.emplace_back(
621 262 : std::pair(MD_GDAL_OGR_ALTERNATIVE_NAME, pszAlternativeName));
622 :
623 17886 : const char *pszDefault = poFieldDefn->GetDefault();
624 17886 : if (pszDefault && pszDefault[0])
625 42 : oMetadata.emplace_back(std::pair(MD_GDAL_OGR_DEFAULT, pszDefault));
626 :
627 17886 : const std::string &osComment = poFieldDefn->GetComment();
628 17886 : if (!osComment.empty())
629 10 : oMetadata.emplace_back(std::pair(MD_GDAL_OGR_COMMENT, osComment));
630 :
631 17886 : if (eType == OFTString && eSubType == OFSTJSON)
632 : {
633 130 : oMetadata.emplace_back(
634 130 : std::pair(ARROW_EXTENSION_NAME_KEY, EXTENSION_NAME_ARROW_JSON));
635 : }
636 17756 : else if (eSubType != OFSTNone && eSubType != OFSTBoolean &&
637 : eSubType != OFSTFloat32)
638 : {
639 0 : oMetadata.emplace_back(std::pair(
640 741 : MD_GDAL_OGR_SUBTYPE, OGR_GetFieldSubTypeName(eSubType)));
641 : }
642 17886 : if (eType == OFTString && poFieldDefn->GetWidth() > 0)
643 : {
644 0 : oMetadata.emplace_back(std::pair(
645 641 : MD_GDAL_OGR_WIDTH, CPLSPrintf("%d", poFieldDefn->GetWidth())));
646 : }
647 17886 : if (poFieldDefn->IsUnique())
648 : {
649 10 : oMetadata.emplace_back(std::pair(MD_GDAL_OGR_UNIQUE, "true"));
650 : }
651 17886 : if (!poFieldDefn->GetDomainName().empty())
652 : {
653 64 : oMetadata.emplace_back(std::pair(MD_GDAL_OGR_DOMAIN_NAME,
654 64 : poFieldDefn->GetDomainName()));
655 : }
656 :
657 17886 : if (!oMetadata.empty())
658 : {
659 1863 : uint64_t nLen64 = sizeof(int32_t);
660 3756 : for (const auto &oPair : oMetadata)
661 : {
662 1893 : nLen64 += sizeof(int32_t);
663 1893 : nLen64 += oPair.first.size();
664 1893 : nLen64 += sizeof(int32_t);
665 1893 : nLen64 += oPair.second.size();
666 : }
667 1863 : if (nLen64 <
668 1863 : static_cast<uint64_t>(std::numeric_limits<int32_t>::max()))
669 : {
670 1863 : const size_t nLen = static_cast<size_t>(nLen64);
671 1863 : char *pszMetadata = static_cast<char *>(CPLMalloc(nLen));
672 1863 : psChild->metadata = pszMetadata;
673 1863 : size_t offsetMD = 0;
674 1863 : int32_t nSize = static_cast<int>(oMetadata.size());
675 1863 : memcpy(pszMetadata + offsetMD, &nSize, sizeof(nSize));
676 1863 : offsetMD += sizeof(int32_t);
677 3756 : for (const auto &oPair : oMetadata)
678 : {
679 1893 : nSize = static_cast<int32_t>(oPair.first.size());
680 1893 : memcpy(pszMetadata + offsetMD, &nSize, sizeof(nSize));
681 1893 : offsetMD += sizeof(int32_t);
682 1893 : memcpy(pszMetadata + offsetMD, oPair.first.data(),
683 : oPair.first.size());
684 1893 : offsetMD += oPair.first.size();
685 :
686 1893 : nSize = static_cast<int32_t>(oPair.second.size());
687 1893 : memcpy(pszMetadata + offsetMD, &nSize, sizeof(nSize));
688 1893 : offsetMD += sizeof(int32_t);
689 1893 : memcpy(pszMetadata + offsetMD, oPair.second.data(),
690 : oPair.second.size());
691 1893 : offsetMD += oPair.second.size();
692 : }
693 :
694 1863 : CPLAssert(offsetMD == nLen);
695 1863 : CPL_IGNORE_RET_VAL(offsetMD);
696 : }
697 : else
698 : {
699 : // Extremely unlikely !
700 0 : CPLError(CE_Warning, CPLE_AppDefined,
701 : "Cannot write ArrowSchema::metadata due to "
702 : "too large content");
703 : }
704 : }
705 : }
706 :
707 : const char *const pszGeometryMetadataEncoding =
708 2234 : m_aosArrowArrayStreamOptions.FetchNameValue(
709 : "GEOMETRY_METADATA_ENCODING");
710 2234 : const char *pszExtensionName = EXTENSION_NAME_OGC_WKB;
711 2234 : if (pszGeometryMetadataEncoding)
712 : {
713 6 : if (EQUAL(pszGeometryMetadataEncoding, "OGC"))
714 0 : pszExtensionName = EXTENSION_NAME_OGC_WKB;
715 6 : else if (EQUAL(pszGeometryMetadataEncoding, "GEOARROW"))
716 6 : pszExtensionName = EXTENSION_NAME_GEOARROW_WKB;
717 : else
718 0 : CPLError(CE_Warning, CPLE_NotSupported,
719 : "Unsupported GEOMETRY_METADATA_ENCODING value: %s",
720 : pszGeometryMetadataEncoding);
721 : }
722 4181 : for (int i = 0; i < nGeomFieldCount; ++i)
723 : {
724 1947 : const auto poFieldDefn = poLayerDefn->GetGeomFieldDefn(i);
725 1947 : if (poFieldDefn->IsIgnored())
726 : {
727 15 : continue;
728 : }
729 :
730 1932 : out_schema->children[iSchemaChild] = CreateSchemaForWKBGeometryColumn(
731 : poFieldDefn, "z", pszExtensionName);
732 :
733 1932 : ++iSchemaChild;
734 : }
735 :
736 2234 : out_schema->n_children = iSchemaChild;
737 2234 : out_schema->release = OGRLayer::ReleaseSchema;
738 2234 : return 0;
739 : }
740 :
741 : /************************************************************************/
742 : /* CreateSchemaForWKBGeometryColumn() */
743 : /************************************************************************/
744 :
745 : /** Return a ArrowSchema* corresponding to the WKB encoding of a geometry
746 : * column.
747 : */
748 :
749 : /* static */
750 : struct ArrowSchema *
751 2261 : OGRLayer::CreateSchemaForWKBGeometryColumn(const OGRGeomFieldDefn *poFieldDefn,
752 : const char *pszArrowFormat,
753 : const char *pszExtensionName)
754 : {
755 2261 : CPLAssert(strcmp(pszArrowFormat, "z") == 0 ||
756 : strcmp(pszArrowFormat, "Z") == 0);
757 2261 : if (!EQUAL(pszExtensionName, EXTENSION_NAME_OGC_WKB) &&
758 6 : !EQUAL(pszExtensionName, EXTENSION_NAME_GEOARROW_WKB))
759 : {
760 0 : CPLError(CE_Failure, CPLE_NotSupported,
761 : "Unsupported extension name '%s'. Defaulting to '%s'",
762 : pszExtensionName, EXTENSION_NAME_OGC_WKB);
763 0 : pszExtensionName = EXTENSION_NAME_OGC_WKB;
764 : }
765 : auto psSchema = static_cast<struct ArrowSchema *>(
766 2261 : CPLCalloc(1, sizeof(struct ArrowSchema)));
767 2261 : psSchema->release = OGRLayer::ReleaseSchema;
768 2261 : const char *pszGeomFieldName = poFieldDefn->GetNameRef();
769 2261 : if (pszGeomFieldName[0] == '\0')
770 781 : pszGeomFieldName = DEFAULT_ARROW_GEOMETRY_NAME;
771 2261 : psSchema->name = CPLStrdup(pszGeomFieldName);
772 2261 : if (poFieldDefn->IsNullable())
773 2232 : psSchema->flags = ARROW_FLAG_NULLABLE;
774 2261 : psSchema->format = strcmp(pszArrowFormat, "z") == 0 ? "z" : "Z";
775 2261 : std::string osExtensionMetadata;
776 2261 : if (EQUAL(pszExtensionName, EXTENSION_NAME_GEOARROW_WKB))
777 : {
778 6 : const auto poSRS = poFieldDefn->GetSpatialRef();
779 6 : if (poSRS)
780 : {
781 3 : char *pszPROJJSON = nullptr;
782 3 : poSRS->exportToPROJJSON(&pszPROJJSON, nullptr);
783 3 : if (pszPROJJSON)
784 : {
785 3 : osExtensionMetadata = "{\"crs\":";
786 3 : osExtensionMetadata += pszPROJJSON;
787 3 : osExtensionMetadata += '}';
788 3 : CPLFree(pszPROJJSON);
789 : }
790 : else
791 : {
792 0 : CPLError(CE_Warning, CPLE_AppDefined,
793 : "Cannot export CRS of geometry field %s to PROJJSON",
794 : poFieldDefn->GetNameRef());
795 : }
796 : }
797 : }
798 2261 : size_t nLen = sizeof(int32_t) + sizeof(int32_t) +
799 : strlen(ARROW_EXTENSION_NAME_KEY) + sizeof(int32_t) +
800 2261 : strlen(pszExtensionName);
801 2261 : if (!osExtensionMetadata.empty())
802 : {
803 3 : nLen += sizeof(int32_t) + strlen(ARROW_EXTENSION_METADATA_KEY) +
804 3 : sizeof(int32_t) + osExtensionMetadata.size();
805 : }
806 2261 : char *pszMetadata = static_cast<char *>(CPLMalloc(nLen));
807 2261 : psSchema->metadata = pszMetadata;
808 2261 : size_t offsetMD = 0;
809 2261 : *reinterpret_cast<int32_t *>(pszMetadata + offsetMD) =
810 2261 : osExtensionMetadata.empty() ? 1 : 2;
811 2261 : offsetMD += sizeof(int32_t);
812 2261 : *reinterpret_cast<int32_t *>(pszMetadata + offsetMD) =
813 : static_cast<int32_t>(strlen(ARROW_EXTENSION_NAME_KEY));
814 2261 : offsetMD += sizeof(int32_t);
815 2261 : memcpy(pszMetadata + offsetMD, ARROW_EXTENSION_NAME_KEY,
816 : strlen(ARROW_EXTENSION_NAME_KEY));
817 2261 : offsetMD += static_cast<int>(strlen(ARROW_EXTENSION_NAME_KEY));
818 2261 : *reinterpret_cast<int32_t *>(pszMetadata + offsetMD) =
819 2261 : static_cast<int32_t>(strlen(pszExtensionName));
820 2261 : offsetMD += sizeof(int32_t);
821 2261 : memcpy(pszMetadata + offsetMD, pszExtensionName, strlen(pszExtensionName));
822 2261 : offsetMD += strlen(pszExtensionName);
823 2261 : if (!osExtensionMetadata.empty())
824 : {
825 3 : *reinterpret_cast<int32_t *>(pszMetadata + offsetMD) =
826 : static_cast<int32_t>(strlen(ARROW_EXTENSION_METADATA_KEY));
827 3 : offsetMD += sizeof(int32_t);
828 3 : memcpy(pszMetadata + offsetMD, ARROW_EXTENSION_METADATA_KEY,
829 : strlen(ARROW_EXTENSION_METADATA_KEY));
830 3 : offsetMD += static_cast<int>(strlen(ARROW_EXTENSION_METADATA_KEY));
831 3 : *reinterpret_cast<int32_t *>(pszMetadata + offsetMD) =
832 3 : static_cast<int32_t>(osExtensionMetadata.size());
833 3 : offsetMD += sizeof(int32_t);
834 3 : memcpy(pszMetadata + offsetMD, osExtensionMetadata.c_str(),
835 : osExtensionMetadata.size());
836 3 : offsetMD += osExtensionMetadata.size();
837 : }
838 2261 : CPLAssert(offsetMD == nLen);
839 2261 : CPL_IGNORE_RET_VAL(offsetMD);
840 4522 : return psSchema;
841 : }
842 :
843 : /************************************************************************/
844 : /* StaticGetArrowSchema() */
845 : /************************************************************************/
846 :
847 : /** Default implementation of the ArrowArrayStream::get_schema() callback.
848 : *
849 : * To be used by driver implementations that have a custom GetArrowStream()
850 : * implementation.
851 : *
852 : * @since GDAL 3.6
853 : */
854 2455 : int OGRLayer::StaticGetArrowSchema(struct ArrowArrayStream *stream,
855 : struct ArrowSchema *out_schema)
856 : {
857 : auto poLayer = static_cast<ArrowArrayStreamPrivateDataSharedDataWrapper *>(
858 2455 : stream->private_data)
859 2455 : ->poShared->m_poLayer;
860 2455 : if (poLayer == nullptr)
861 : {
862 1 : CPLError(CE_Failure, CPLE_NotSupported,
863 : "Calling get_schema() on a freed OGRLayer is not supported");
864 1 : return EINVAL;
865 : }
866 2454 : return poLayer->GetArrowSchema(stream, out_schema);
867 : }
868 :
869 : /************************************************************************/
870 : /* DefaultReleaseArray() */
871 : /************************************************************************/
872 :
873 35013 : static void OGRLayerDefaultReleaseArray(struct ArrowArray *array)
874 : {
875 35013 : if (array->buffers)
876 : {
877 110019 : for (int i = 0; i < static_cast<int>(array->n_buffers); ++i)
878 75006 : VSIFreeAligned(const_cast<void *>(array->buffers[i]));
879 35013 : CPLFree(array->buffers);
880 : }
881 35013 : if (array->children)
882 : {
883 40956 : for (int i = 0; i < static_cast<int>(array->n_children); ++i)
884 : {
885 33203 : if (array->children[i] && array->children[i]->release)
886 : {
887 32830 : array->children[i]->release(array->children[i]);
888 32830 : CPLFree(array->children[i]);
889 : }
890 : }
891 7753 : CPLFree(array->children);
892 : }
893 35013 : if (array->dictionary)
894 : {
895 148 : if (array->dictionary->release)
896 : {
897 148 : array->dictionary->release(array->dictionary);
898 148 : CPLFree(array->dictionary);
899 : }
900 : }
901 35013 : array->release = nullptr;
902 35013 : }
903 :
904 : /** Release a ArrowArray.
905 : *
906 : * To be used by driver implementations that have a custom GetArrowStream()
907 : * implementation.
908 : *
909 : * @param array Arrow array to release.
910 : * @since GDAL 3.6
911 : */
912 3987 : void OGRLayer::ReleaseArray(struct ArrowArray *array)
913 : {
914 3987 : OGRLayerDefaultReleaseArray(array);
915 3987 : }
916 :
917 : /************************************************************************/
918 : /* IsValidField() */
919 : /************************************************************************/
920 :
921 88526 : static inline bool IsValidField(const OGRField *psRawField)
922 : {
923 103666 : return (!(psRawField->Set.nMarker1 == OGRUnsetMarker &&
924 7570 : psRawField->Set.nMarker2 == OGRUnsetMarker &&
925 177052 : psRawField->Set.nMarker3 == OGRUnsetMarker) &&
926 80956 : !(psRawField->Set.nMarker1 == OGRNullMarker &&
927 3213 : psRawField->Set.nMarker2 == OGRNullMarker &&
928 91739 : psRawField->Set.nMarker3 == OGRNullMarker));
929 : }
930 :
931 : /************************************************************************/
932 : /* AllocValidityBitmap() */
933 : /************************************************************************/
934 :
935 3516 : static uint8_t *AllocValidityBitmap(size_t nSize)
936 : {
937 : auto pabyValidity = static_cast<uint8_t *>(
938 3516 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE((1 + nSize + 7) / 8));
939 3516 : if (pabyValidity)
940 : {
941 : // All valid initially
942 3516 : memset(pabyValidity, 0xFF, (nSize + 7) / 8);
943 : }
944 3516 : return pabyValidity;
945 : }
946 :
947 : /************************************************************************/
948 : /* FillArray() */
949 : /************************************************************************/
950 :
951 : template <class T, typename TMember>
952 5816 : static bool FillArray(struct ArrowArray *psChild,
953 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
954 : const size_t nFeatureCountLimit, const bool bIsNullable,
955 : TMember member, const int i)
956 : {
957 5816 : psChild->n_buffers = 2;
958 5816 : psChild->buffers = static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
959 5816 : uint8_t *pabyValidity = nullptr;
960 : T *panValues = static_cast<T *>(
961 5816 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(T) * (1 + nFeatureCountLimit)));
962 5816 : if (panValues == nullptr)
963 0 : return false;
964 5816 : psChild->buffers[1] = panValues;
965 53734 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat)
966 : {
967 47918 : auto &poFeature = apoFeatures[iFeat];
968 47918 : const auto psRawField = poFeature->GetRawFieldRef(i);
969 47918 : if (IsValidField(psRawField))
970 : {
971 43000 : panValues[iFeat] = static_cast<T>((*psRawField).*member);
972 : }
973 4918 : else if (bIsNullable)
974 : {
975 4918 : panValues[iFeat] = 0;
976 4918 : ++psChild->null_count;
977 4918 : if (pabyValidity == nullptr)
978 : {
979 1235 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
980 1235 : psChild->buffers[0] = pabyValidity;
981 1235 : if (pabyValidity == nullptr)
982 0 : return false;
983 : }
984 4918 : UnsetBit(pabyValidity, iFeat);
985 : }
986 : else
987 : {
988 0 : panValues[iFeat] = 0;
989 : }
990 : }
991 5816 : return true;
992 : }
993 :
994 : /************************************************************************/
995 : /* FillBoolArray() */
996 : /************************************************************************/
997 :
998 : template <typename TMember>
999 138 : static bool FillBoolArray(struct ArrowArray *psChild,
1000 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1001 : const size_t nFeatureCountLimit,
1002 : const bool bIsNullable, TMember member, const int i)
1003 : {
1004 138 : psChild->n_buffers = 2;
1005 138 : psChild->buffers = static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1006 138 : uint8_t *pabyValidity = nullptr;
1007 : uint8_t *panValues = static_cast<uint8_t *>(
1008 138 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE((nFeatureCountLimit + 7 + 1) / 8));
1009 138 : if (panValues == nullptr)
1010 0 : return false;
1011 138 : memset(panValues, 0, (nFeatureCountLimit + 7) / 8);
1012 138 : psChild->buffers[1] = panValues;
1013 601 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat)
1014 : {
1015 463 : auto &poFeature = apoFeatures[iFeat];
1016 463 : const auto psRawField = poFeature->GetRawFieldRef(i);
1017 463 : if (IsValidField(psRawField))
1018 : {
1019 405 : if ((*psRawField).*member)
1020 81 : SetBit(panValues, iFeat);
1021 : }
1022 58 : else if (bIsNullable)
1023 : {
1024 58 : ++psChild->null_count;
1025 58 : if (pabyValidity == nullptr)
1026 : {
1027 46 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1028 46 : psChild->buffers[0] = pabyValidity;
1029 46 : if (pabyValidity == nullptr)
1030 0 : return false;
1031 : }
1032 58 : UnsetBit(pabyValidity, iFeat);
1033 : }
1034 : }
1035 138 : return true;
1036 : }
1037 :
1038 : /************************************************************************/
1039 : /* FillListArray() */
1040 : /************************************************************************/
1041 :
1042 : struct GetFromIntegerList
1043 : {
1044 555 : static inline int getCount(const OGRField *psRawField)
1045 : {
1046 555 : return psRawField->IntegerList.nCount;
1047 : }
1048 :
1049 276 : static inline const int *getValues(const OGRField *psRawField)
1050 : {
1051 276 : return psRawField->IntegerList.paList;
1052 : }
1053 : };
1054 :
1055 : struct GetFromInteger64List
1056 : {
1057 242 : static inline int getCount(const OGRField *psRawField)
1058 : {
1059 242 : return psRawField->Integer64List.nCount;
1060 : }
1061 :
1062 120 : static inline const GIntBig *getValues(const OGRField *psRawField)
1063 : {
1064 120 : return psRawField->Integer64List.paList;
1065 : }
1066 : };
1067 :
1068 : struct GetFromRealList
1069 : {
1070 374 : static inline int getCount(const OGRField *psRawField)
1071 : {
1072 374 : return psRawField->RealList.nCount;
1073 : }
1074 :
1075 186 : static inline const double *getValues(const OGRField *psRawField)
1076 : {
1077 186 : return psRawField->RealList.paList;
1078 : }
1079 : };
1080 :
1081 : template <class OffsetType, class T, class GetFromList>
1082 : static size_t
1083 416 : FillListArray(struct ArrowArray *psChild,
1084 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1085 : const size_t nFeatureCountLimit, const bool bIsNullable,
1086 : const int i, const size_t nMemLimit)
1087 : {
1088 416 : psChild->n_buffers = 2;
1089 416 : psChild->buffers = static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1090 416 : uint8_t *pabyValidity = nullptr;
1091 : OffsetType *panOffsets =
1092 416 : static_cast<OffsetType *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
1093 : sizeof(OffsetType) * (1 + nFeatureCountLimit)));
1094 416 : if (panOffsets == nullptr)
1095 0 : return 0;
1096 416 : psChild->buffers[1] = panOffsets;
1097 :
1098 416 : OffsetType nOffset = 0;
1099 416 : size_t nFeatCount = 0;
1100 1445 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat, ++nFeatCount)
1101 : {
1102 1035 : panOffsets[iFeat] = nOffset;
1103 1035 : auto &poFeature = apoFeatures[iFeat];
1104 1035 : const auto psRawField = poFeature->GetRawFieldRef(i);
1105 1035 : if (IsValidField(psRawField))
1106 : {
1107 529 : const unsigned nCount = GetFromList::getCount(psRawField);
1108 529 : if (nCount > static_cast<size_t>(nMemLimit - nOffset))
1109 : {
1110 6 : if (nFeatCount == 0)
1111 3 : return 0;
1112 3 : break;
1113 : }
1114 523 : nOffset += static_cast<OffsetType>(nCount);
1115 : }
1116 506 : else if (bIsNullable)
1117 : {
1118 506 : ++psChild->null_count;
1119 506 : if (pabyValidity == nullptr)
1120 : {
1121 231 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1122 231 : psChild->buffers[0] = pabyValidity;
1123 231 : if (pabyValidity == nullptr)
1124 0 : return 0;
1125 : }
1126 506 : UnsetBit(pabyValidity, iFeat);
1127 : }
1128 : }
1129 413 : panOffsets[nFeatCount] = nOffset;
1130 :
1131 413 : psChild->n_children = 1;
1132 413 : psChild->children = static_cast<struct ArrowArray **>(
1133 413 : CPLCalloc(1, sizeof(struct ArrowArray *)));
1134 826 : psChild->children[0] = static_cast<struct ArrowArray *>(
1135 413 : CPLCalloc(1, sizeof(struct ArrowArray)));
1136 413 : auto psValueChild = psChild->children[0];
1137 :
1138 413 : psValueChild->release = OGRLayerDefaultReleaseArray;
1139 413 : psValueChild->n_buffers = 2;
1140 413 : psValueChild->buffers =
1141 413 : static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1142 413 : psValueChild->length = nOffset;
1143 : T *panValues = static_cast<T *>(
1144 413 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(T) * (nOffset + 1)));
1145 413 : if (panValues == nullptr)
1146 0 : return 0;
1147 413 : psValueChild->buffers[1] = panValues;
1148 :
1149 413 : nOffset = 0;
1150 1442 : for (size_t iFeat = 0; iFeat < nFeatCount; ++iFeat)
1151 : {
1152 1029 : auto &poFeature = apoFeatures[iFeat];
1153 1029 : const auto psRawField = poFeature->GetRawFieldRef(i);
1154 1029 : if (IsValidField(psRawField))
1155 : {
1156 523 : const int nCount = GetFromList::getCount(psRawField);
1157 523 : const auto paList = GetFromList::getValues(psRawField);
1158 : if (sizeof(*paList) == sizeof(T))
1159 456 : memcpy(panValues + nOffset, paList, nCount * sizeof(T));
1160 : else
1161 : {
1162 203 : for (int j = 0; j < nCount; ++j)
1163 : {
1164 136 : panValues[nOffset + j] = static_cast<T>(paList[j]);
1165 : }
1166 : }
1167 523 : nOffset += static_cast<OffsetType>(nCount);
1168 : }
1169 : }
1170 :
1171 413 : return nFeatCount;
1172 : }
1173 :
1174 : template <class OffsetType, class GetFromList>
1175 : static size_t
1176 49 : FillListArrayBool(struct ArrowArray *psChild,
1177 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1178 : const size_t nFeatureCountLimit, const bool bIsNullable,
1179 : const int i, const size_t nMemLimit)
1180 : {
1181 49 : psChild->n_buffers = 2;
1182 49 : psChild->buffers = static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1183 49 : uint8_t *pabyValidity = nullptr;
1184 : OffsetType *panOffsets =
1185 49 : static_cast<OffsetType *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
1186 : sizeof(OffsetType) * (1 + nFeatureCountLimit)));
1187 49 : if (panOffsets == nullptr)
1188 0 : return 0;
1189 49 : psChild->buffers[1] = panOffsets;
1190 :
1191 49 : OffsetType nOffset = 0;
1192 49 : size_t nFeatCount = 0;
1193 138 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat, ++nFeatCount)
1194 : {
1195 91 : panOffsets[iFeat] = nOffset;
1196 91 : auto &poFeature = apoFeatures[iFeat];
1197 91 : const auto psRawField = poFeature->GetRawFieldRef(i);
1198 91 : if (IsValidField(psRawField))
1199 : {
1200 60 : const unsigned nCount = GetFromList::getCount(psRawField);
1201 60 : if (nCount > static_cast<size_t>(nMemLimit - nOffset))
1202 : {
1203 2 : if (nFeatCount == 0)
1204 1 : return 0;
1205 1 : break;
1206 : }
1207 58 : nOffset += static_cast<OffsetType>(nCount);
1208 : }
1209 31 : else if (bIsNullable)
1210 : {
1211 31 : ++psChild->null_count;
1212 31 : if (pabyValidity == nullptr)
1213 : {
1214 27 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1215 27 : psChild->buffers[0] = pabyValidity;
1216 27 : if (pabyValidity == nullptr)
1217 0 : return 0;
1218 : }
1219 31 : UnsetBit(pabyValidity, iFeat);
1220 : }
1221 : }
1222 48 : panOffsets[nFeatCount] = nOffset;
1223 :
1224 48 : psChild->n_children = 1;
1225 48 : psChild->children = static_cast<struct ArrowArray **>(
1226 48 : CPLCalloc(1, sizeof(struct ArrowArray *)));
1227 96 : psChild->children[0] = static_cast<struct ArrowArray *>(
1228 48 : CPLCalloc(1, sizeof(struct ArrowArray)));
1229 48 : auto psValueChild = psChild->children[0];
1230 :
1231 48 : psValueChild->release = OGRLayerDefaultReleaseArray;
1232 48 : psValueChild->n_buffers = 2;
1233 48 : psValueChild->buffers =
1234 48 : static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1235 48 : psValueChild->length = nOffset;
1236 : uint8_t *panValues = static_cast<uint8_t *>(
1237 48 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE((nOffset + 7 + 1) / 8));
1238 48 : if (panValues == nullptr)
1239 0 : return 0;
1240 48 : memset(panValues, 0, (nOffset + 7) / 8);
1241 48 : psValueChild->buffers[1] = panValues;
1242 :
1243 48 : nOffset = 0;
1244 138 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat)
1245 : {
1246 90 : auto &poFeature = apoFeatures[iFeat];
1247 90 : const auto psRawField = poFeature->GetRawFieldRef(i);
1248 90 : if (IsValidField(psRawField))
1249 : {
1250 59 : const int nCount = GetFromList::getCount(psRawField);
1251 59 : const auto paList = GetFromList::getValues(psRawField);
1252 :
1253 373 : for (int j = 0; j < nCount; ++j)
1254 : {
1255 314 : if (paList[j])
1256 55 : SetBit(panValues, nOffset + j);
1257 : }
1258 59 : nOffset += static_cast<OffsetType>(nCount);
1259 : }
1260 : }
1261 :
1262 48 : return nFeatCount;
1263 : }
1264 :
1265 : /************************************************************************/
1266 : /* FillStringArray() */
1267 : /************************************************************************/
1268 :
1269 : template <class T>
1270 : static size_t
1271 3767 : FillStringArray(struct ArrowArray *psChild,
1272 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1273 : const size_t nFeatureCountLimit, const bool bIsNullable,
1274 : const int i, const size_t nMemLimit)
1275 : {
1276 3767 : psChild->n_buffers = 3;
1277 3767 : psChild->buffers = static_cast<const void **>(CPLCalloc(3, sizeof(void *)));
1278 3767 : uint8_t *pabyValidity = nullptr;
1279 : T *panOffsets = static_cast<T *>(
1280 3767 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(T) * (1 + nFeatureCountLimit)));
1281 3767 : if (panOffsets == nullptr)
1282 0 : return 0;
1283 3767 : psChild->buffers[1] = panOffsets;
1284 :
1285 3767 : size_t nOffset = 0;
1286 3767 : size_t nFeatCount = 0;
1287 33969 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat, ++nFeatCount)
1288 : {
1289 30222 : panOffsets[iFeat] = static_cast<T>(nOffset);
1290 30222 : const auto psRawField = apoFeatures[iFeat]->GetRawFieldRef(i);
1291 30222 : if (IsValidField(psRawField))
1292 : {
1293 26882 : const size_t nLen = strlen(psRawField->String);
1294 26882 : if (nLen > nMemLimit - nOffset)
1295 : {
1296 20 : if (nFeatCount == 0)
1297 19 : return 0;
1298 1 : break;
1299 : }
1300 26862 : nOffset += static_cast<T>(nLen);
1301 : }
1302 3340 : else if (bIsNullable)
1303 : {
1304 3340 : ++psChild->null_count;
1305 3340 : if (pabyValidity == nullptr)
1306 : {
1307 1131 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1308 1131 : psChild->buffers[0] = pabyValidity;
1309 1131 : if (pabyValidity == nullptr)
1310 0 : return 0;
1311 : }
1312 3340 : UnsetBit(pabyValidity, iFeat);
1313 : }
1314 : }
1315 3748 : panOffsets[nFeatCount] = static_cast<T>(nOffset);
1316 :
1317 : char *pachValues =
1318 3748 : static_cast<char *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nOffset + 1));
1319 3748 : if (pachValues == nullptr)
1320 0 : return 0;
1321 3748 : psChild->buffers[2] = pachValues;
1322 :
1323 3748 : nOffset = 0;
1324 33950 : for (size_t iFeat = 0; iFeat < nFeatCount; ++iFeat)
1325 : {
1326 30202 : const size_t nLen =
1327 30202 : static_cast<size_t>(panOffsets[iFeat + 1] - panOffsets[iFeat]);
1328 30202 : if (nLen)
1329 : {
1330 25310 : const auto psRawField = apoFeatures[iFeat]->GetRawFieldRef(i);
1331 25310 : memcpy(pachValues + nOffset, psRawField->String, nLen);
1332 25310 : nOffset += nLen;
1333 : }
1334 : }
1335 :
1336 3748 : return nFeatCount;
1337 : }
1338 :
1339 : /************************************************************************/
1340 : /* FillStringListArray() */
1341 : /************************************************************************/
1342 :
1343 : template <class OffsetType>
1344 : static size_t
1345 203 : FillStringListArray(struct ArrowArray *psChild,
1346 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1347 : const size_t nFeatureCountLimit, const bool bIsNullable,
1348 : const int i, const size_t nMemLimit)
1349 : {
1350 203 : psChild->n_buffers = 2;
1351 203 : psChild->buffers = static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1352 203 : uint8_t *pabyValidity = nullptr;
1353 : OffsetType *panOffsets =
1354 203 : static_cast<OffsetType *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
1355 : sizeof(OffsetType) * (1 + nFeatureCountLimit)));
1356 203 : if (panOffsets == nullptr)
1357 0 : return false;
1358 203 : psChild->buffers[1] = panOffsets;
1359 :
1360 203 : OffsetType nStrings = 0;
1361 203 : OffsetType nCountChars = 0;
1362 203 : size_t nFeatCount = 0;
1363 516 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat, ++nFeatCount)
1364 : {
1365 315 : panOffsets[iFeat] = nStrings;
1366 315 : auto &poFeature = apoFeatures[iFeat];
1367 315 : const auto psRawField = poFeature->GetRawFieldRef(i);
1368 315 : if (IsValidField(psRawField))
1369 : {
1370 108 : const int nCount = psRawField->StringList.nCount;
1371 108 : if (static_cast<size_t>(nCount) >
1372 108 : static_cast<size_t>(nMemLimit - nStrings))
1373 : {
1374 0 : if (nFeatCount == 0)
1375 0 : return 0;
1376 0 : goto after_loop;
1377 : }
1378 280 : for (int j = 0; j < nCount; ++j)
1379 : {
1380 174 : const size_t nLen = strlen(psRawField->StringList.paList[j]);
1381 174 : if (nLen > static_cast<size_t>(nMemLimit - nCountChars))
1382 : {
1383 2 : if (nFeatCount == 0)
1384 1 : return 0;
1385 1 : goto after_loop;
1386 : }
1387 172 : nCountChars += static_cast<OffsetType>(nLen);
1388 : }
1389 106 : nStrings += static_cast<OffsetType>(nCount);
1390 : }
1391 207 : else if (bIsNullable)
1392 : {
1393 207 : ++psChild->null_count;
1394 207 : if (pabyValidity == nullptr)
1395 : {
1396 152 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1397 152 : psChild->buffers[0] = pabyValidity;
1398 152 : if (pabyValidity == nullptr)
1399 0 : return 0;
1400 : }
1401 207 : UnsetBit(pabyValidity, iFeat);
1402 : }
1403 : }
1404 201 : after_loop:
1405 202 : panOffsets[nFeatCount] = nStrings;
1406 :
1407 202 : psChild->n_children = 1;
1408 202 : psChild->children = static_cast<struct ArrowArray **>(
1409 202 : CPLCalloc(1, sizeof(struct ArrowArray *)));
1410 404 : psChild->children[0] = static_cast<struct ArrowArray *>(
1411 202 : CPLCalloc(1, sizeof(struct ArrowArray)));
1412 202 : auto psValueChild = psChild->children[0];
1413 :
1414 202 : psValueChild->release = OGRLayerDefaultReleaseArray;
1415 202 : psValueChild->length = nStrings;
1416 202 : psValueChild->n_buffers = 3;
1417 202 : psValueChild->buffers =
1418 202 : static_cast<const void **>(CPLCalloc(3, sizeof(void *)));
1419 :
1420 : OffsetType *panChildOffsets = static_cast<OffsetType *>(
1421 202 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(OffsetType) * (1 + nStrings)));
1422 202 : if (panChildOffsets == nullptr)
1423 0 : return 0;
1424 202 : psValueChild->buffers[1] = panChildOffsets;
1425 :
1426 : char *pachValues =
1427 202 : static_cast<char *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nCountChars + 1));
1428 202 : if (pachValues == nullptr)
1429 0 : return 0;
1430 202 : psValueChild->buffers[2] = pachValues;
1431 :
1432 202 : nStrings = 0;
1433 202 : nCountChars = 0;
1434 515 : for (size_t iFeat = 0; iFeat < nFeatCount; ++iFeat)
1435 : {
1436 313 : auto &poFeature = apoFeatures[iFeat];
1437 313 : const auto psRawField = poFeature->GetRawFieldRef(i);
1438 313 : if (IsValidField(psRawField))
1439 : {
1440 106 : const int nCount = psRawField->StringList.nCount;
1441 278 : for (int j = 0; j < nCount; ++j)
1442 : {
1443 172 : panChildOffsets[nStrings] = nCountChars;
1444 172 : ++nStrings;
1445 172 : const size_t nLen = strlen(psRawField->StringList.paList[j]);
1446 172 : memcpy(pachValues + nCountChars,
1447 172 : psRawField->StringList.paList[j], nLen);
1448 172 : nCountChars += static_cast<OffsetType>(nLen);
1449 : }
1450 : }
1451 : }
1452 202 : panChildOffsets[nStrings] = nCountChars;
1453 :
1454 202 : return nFeatCount;
1455 : }
1456 :
1457 : /************************************************************************/
1458 : /* FillBinaryArray() */
1459 : /************************************************************************/
1460 :
1461 : template <class T>
1462 : static size_t
1463 905 : FillBinaryArray(struct ArrowArray *psChild,
1464 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1465 : const size_t nFeatureCountLimit, const bool bIsNullable,
1466 : const int i, const size_t nMemLimit)
1467 : {
1468 905 : psChild->n_buffers = 3;
1469 905 : psChild->buffers = static_cast<const void **>(CPLCalloc(3, sizeof(void *)));
1470 905 : uint8_t *pabyValidity = nullptr;
1471 : T *panOffsets = static_cast<T *>(
1472 905 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(T) * (1 + nFeatureCountLimit)));
1473 905 : if (panOffsets == nullptr)
1474 0 : return 0;
1475 905 : psChild->buffers[1] = panOffsets;
1476 :
1477 905 : T nOffset = 0;
1478 905 : size_t nFeatCount = 0;
1479 4362 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat, ++nFeatCount)
1480 : {
1481 3459 : panOffsets[iFeat] = nOffset;
1482 3459 : const auto psRawField = apoFeatures[iFeat]->GetRawFieldRef(i);
1483 3459 : if (IsValidField(psRawField))
1484 : {
1485 3402 : const size_t nLen = psRawField->Binary.nCount;
1486 3402 : if (nLen > static_cast<size_t>(nMemLimit - nOffset))
1487 : {
1488 2 : if (iFeat == 0)
1489 1 : return 0;
1490 1 : break;
1491 : }
1492 3400 : nOffset += static_cast<T>(nLen);
1493 : }
1494 57 : else if (bIsNullable)
1495 : {
1496 57 : ++psChild->null_count;
1497 57 : if (pabyValidity == nullptr)
1498 : {
1499 49 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1500 49 : psChild->buffers[0] = pabyValidity;
1501 49 : if (pabyValidity == nullptr)
1502 0 : return 0;
1503 : }
1504 57 : UnsetBit(pabyValidity, iFeat);
1505 : }
1506 : }
1507 904 : panOffsets[nFeatCount] = nOffset;
1508 :
1509 : GByte *pabyValues =
1510 904 : static_cast<GByte *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nOffset + 1));
1511 904 : if (pabyValues == nullptr)
1512 0 : return 0;
1513 904 : psChild->buffers[2] = pabyValues;
1514 :
1515 904 : nOffset = 0;
1516 4361 : for (size_t iFeat = 0; iFeat < nFeatCount; ++iFeat)
1517 : {
1518 3457 : const size_t nLen =
1519 3457 : static_cast<size_t>(panOffsets[iFeat + 1] - panOffsets[iFeat]);
1520 3457 : if (nLen)
1521 : {
1522 3400 : const auto psRawField = apoFeatures[iFeat]->GetRawFieldRef(i);
1523 3400 : memcpy(pabyValues + nOffset, psRawField->Binary.paData, nLen);
1524 3400 : nOffset += static_cast<T>(nLen);
1525 : }
1526 : }
1527 :
1528 904 : return nFeatCount;
1529 : }
1530 :
1531 : /************************************************************************/
1532 : /* FillFixedWidthBinaryArray() */
1533 : /************************************************************************/
1534 :
1535 : static bool
1536 8 : FillFixedWidthBinaryArray(struct ArrowArray *psChild,
1537 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1538 : const size_t nFeatureCountLimit,
1539 : const bool bIsNullable, const int nWidth, const int i)
1540 : {
1541 8 : psChild->n_buffers = 2;
1542 8 : psChild->buffers = static_cast<const void **>(CPLCalloc(3, sizeof(void *)));
1543 8 : uint8_t *pabyValidity = nullptr;
1544 :
1545 8 : assert(nFeatureCountLimit + 1 <=
1546 : std::numeric_limits<size_t>::max() / nWidth);
1547 : GByte *pabyValues = static_cast<GByte *>(
1548 8 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE((nFeatureCountLimit + 1) * nWidth));
1549 8 : if (pabyValues == nullptr)
1550 0 : return false;
1551 8 : psChild->buffers[1] = pabyValues;
1552 :
1553 29 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat)
1554 : {
1555 21 : const auto psRawField = apoFeatures[iFeat]->GetRawFieldRef(i);
1556 21 : if (IsValidField(psRawField))
1557 : {
1558 20 : const auto nLen = psRawField->Binary.nCount;
1559 20 : if (nLen < nWidth)
1560 : {
1561 0 : memcpy(pabyValues + iFeat * nWidth, psRawField->Binary.paData,
1562 : nLen);
1563 0 : memset(pabyValues + iFeat * nWidth + nLen, 0, nWidth - nLen);
1564 : }
1565 : else
1566 : {
1567 20 : memcpy(pabyValues + iFeat * nWidth, psRawField->Binary.paData,
1568 : nWidth);
1569 : }
1570 : }
1571 : else
1572 : {
1573 1 : memset(pabyValues + iFeat * nWidth, 0, nWidth);
1574 1 : if (bIsNullable)
1575 : {
1576 1 : ++psChild->null_count;
1577 1 : if (pabyValidity == nullptr)
1578 : {
1579 1 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1580 1 : psChild->buffers[0] = pabyValidity;
1581 1 : if (pabyValidity == nullptr)
1582 0 : return false;
1583 : }
1584 1 : UnsetBit(pabyValidity, iFeat);
1585 : }
1586 : }
1587 : }
1588 :
1589 8 : return true;
1590 : }
1591 :
1592 : /************************************************************************/
1593 : /* FillWKBGeometryArray() */
1594 : /************************************************************************/
1595 :
1596 : template <class T>
1597 : static size_t
1598 1245 : FillWKBGeometryArray(struct ArrowArray *psChild,
1599 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1600 : const size_t nFeatureCountLimit,
1601 : const OGRGeomFieldDefn *poFieldDefn, const int i,
1602 : const size_t nMemLimit)
1603 : {
1604 1245 : const bool bIsNullable = CPL_TO_BOOL(poFieldDefn->IsNullable());
1605 1245 : psChild->n_buffers = 3;
1606 1245 : psChild->buffers = static_cast<const void **>(CPLCalloc(3, sizeof(void *)));
1607 1245 : uint8_t *pabyValidity = nullptr;
1608 : T *panOffsets = static_cast<T *>(
1609 1245 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(T) * (1 + nFeatureCountLimit)));
1610 1245 : if (panOffsets == nullptr)
1611 0 : return 0;
1612 1245 : psChild->buffers[1] = panOffsets;
1613 1245 : const auto eGeomType = poFieldDefn->GetType();
1614 3735 : auto poEmptyGeom =
1615 : std::unique_ptr<OGRGeometry>(OGRGeometryFactory::createGeometry(
1616 1245 : (eGeomType == wkbNone || wkbFlatten(eGeomType) == wkbUnknown)
1617 : ? wkbGeometryCollection
1618 : : eGeomType));
1619 :
1620 1245 : size_t nOffset = 0;
1621 1245 : size_t nFeatCount = 0;
1622 14365 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat, ++nFeatCount)
1623 : {
1624 13121 : panOffsets[iFeat] = static_cast<T>(nOffset);
1625 13121 : const auto poGeom = apoFeatures[iFeat]->GetGeomFieldRef(i);
1626 13121 : if (poGeom != nullptr)
1627 : {
1628 12546 : const size_t nLen = poGeom->WkbSize();
1629 12546 : if (nLen > nMemLimit - nOffset)
1630 : {
1631 1 : if (nFeatCount == 0)
1632 0 : return 0;
1633 1 : break;
1634 : }
1635 12545 : nOffset += static_cast<T>(nLen);
1636 : }
1637 575 : else if (bIsNullable)
1638 : {
1639 575 : ++psChild->null_count;
1640 575 : if (pabyValidity == nullptr)
1641 : {
1642 277 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1643 277 : psChild->buffers[0] = pabyValidity;
1644 277 : if (pabyValidity == nullptr)
1645 0 : return 0;
1646 : }
1647 575 : UnsetBit(pabyValidity, iFeat);
1648 : }
1649 0 : else if (poEmptyGeom)
1650 : {
1651 0 : const size_t nLen = poEmptyGeom->WkbSize();
1652 0 : if (nLen > nMemLimit - nOffset)
1653 : {
1654 0 : if (nFeatCount == 0)
1655 0 : return 0;
1656 0 : break;
1657 : }
1658 0 : nOffset += static_cast<T>(nLen);
1659 : }
1660 : }
1661 1245 : panOffsets[nFeatCount] = static_cast<T>(nOffset);
1662 :
1663 : GByte *pabyValues =
1664 1245 : static_cast<GByte *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nOffset + 1));
1665 1245 : if (pabyValues == nullptr)
1666 0 : return 0;
1667 1245 : psChild->buffers[2] = pabyValues;
1668 :
1669 1245 : nOffset = 0;
1670 14365 : for (size_t iFeat = 0; iFeat < nFeatCount; ++iFeat)
1671 : {
1672 13120 : const size_t nLen =
1673 13120 : static_cast<size_t>(panOffsets[iFeat + 1] - panOffsets[iFeat]);
1674 13120 : if (nLen)
1675 : {
1676 12545 : const auto poGeom = apoFeatures[iFeat]->GetGeomFieldRef(i);
1677 12545 : poGeom->exportToWkb(wkbNDR, pabyValues + nOffset, wkbVariantIso);
1678 12545 : nOffset += nLen;
1679 : }
1680 575 : else if (!bIsNullable && poEmptyGeom)
1681 : {
1682 0 : poEmptyGeom->exportToWkb(wkbNDR, pabyValues + nOffset,
1683 : wkbVariantIso);
1684 0 : nOffset += nLen;
1685 : }
1686 : }
1687 :
1688 1245 : return nFeatCount;
1689 : }
1690 :
1691 : /************************************************************************/
1692 : /* FillDateArray() */
1693 : /************************************************************************/
1694 :
1695 125 : static bool FillDateArray(struct ArrowArray *psChild,
1696 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1697 : const size_t nFeatureCountLimit,
1698 : const bool bIsNullable, const int i)
1699 : {
1700 125 : psChild->n_buffers = 2;
1701 125 : psChild->buffers = static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1702 125 : uint8_t *pabyValidity = nullptr;
1703 125 : int32_t *panValues = static_cast<int32_t *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
1704 : sizeof(int32_t) * (nFeatureCountLimit + 1)));
1705 125 : if (panValues == nullptr)
1706 0 : return false;
1707 125 : psChild->buffers[1] = panValues;
1708 475 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat)
1709 : {
1710 350 : auto &poFeature = apoFeatures[iFeat];
1711 350 : const auto psRawField = poFeature->GetRawFieldRef(i);
1712 350 : if (IsValidField(psRawField))
1713 : {
1714 : struct tm brokenDown;
1715 262 : memset(&brokenDown, 0, sizeof(brokenDown));
1716 262 : brokenDown.tm_year = psRawField->Date.Year - 1900;
1717 262 : brokenDown.tm_mon = psRawField->Date.Month - 1;
1718 262 : brokenDown.tm_mday = psRawField->Date.Day;
1719 262 : panValues[iFeat] =
1720 262 : static_cast<int>(CPLYMDHMSToUnixTime(&brokenDown) / 86400);
1721 : }
1722 88 : else if (bIsNullable)
1723 : {
1724 88 : panValues[iFeat] = 0;
1725 88 : ++psChild->null_count;
1726 88 : if (pabyValidity == nullptr)
1727 : {
1728 61 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1729 61 : psChild->buffers[0] = pabyValidity;
1730 61 : if (pabyValidity == nullptr)
1731 0 : return false;
1732 : }
1733 88 : UnsetBit(pabyValidity, iFeat);
1734 : }
1735 : else
1736 : {
1737 0 : panValues[iFeat] = 0;
1738 : }
1739 : }
1740 125 : return true;
1741 : }
1742 :
1743 : /************************************************************************/
1744 : /* FillTimeArray() */
1745 : /************************************************************************/
1746 :
1747 72 : static bool FillTimeArray(struct ArrowArray *psChild,
1748 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1749 : const size_t nFeatureCountLimit,
1750 : const bool bIsNullable, const int i)
1751 : {
1752 72 : psChild->n_buffers = 2;
1753 72 : psChild->buffers = static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1754 72 : uint8_t *pabyValidity = nullptr;
1755 72 : int32_t *panValues = static_cast<int32_t *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
1756 : sizeof(int32_t) * (nFeatureCountLimit + 1)));
1757 72 : if (panValues == nullptr)
1758 0 : return false;
1759 72 : psChild->buffers[1] = panValues;
1760 667 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat)
1761 : {
1762 595 : auto &poFeature = apoFeatures[iFeat];
1763 595 : const auto psRawField = poFeature->GetRawFieldRef(i);
1764 595 : if (IsValidField(psRawField))
1765 : {
1766 548 : panValues[iFeat] =
1767 548 : psRawField->Date.Hour * 3600000 +
1768 548 : psRawField->Date.Minute * 60000 +
1769 548 : static_cast<int>(psRawField->Date.Second * 1000 + 0.5f);
1770 : }
1771 47 : else if (bIsNullable)
1772 : {
1773 47 : panValues[iFeat] = 0;
1774 47 : ++psChild->null_count;
1775 47 : if (pabyValidity == nullptr)
1776 : {
1777 39 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1778 39 : psChild->buffers[0] = pabyValidity;
1779 39 : if (pabyValidity == nullptr)
1780 0 : return false;
1781 : }
1782 47 : UnsetBit(pabyValidity, iFeat);
1783 : }
1784 : else
1785 : {
1786 0 : panValues[iFeat] = 0;
1787 : }
1788 : }
1789 72 : return true;
1790 : }
1791 :
1792 : /************************************************************************/
1793 : /* FillDateTimeArray() */
1794 : /************************************************************************/
1795 :
1796 : static bool
1797 712 : FillDateTimeArray(struct ArrowArray *psChild,
1798 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1799 : const size_t nFeatureCountLimit, const bool bIsNullable,
1800 : const int i, int nFieldTZFlag)
1801 : {
1802 712 : psChild->n_buffers = 2;
1803 712 : psChild->buffers = static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1804 712 : uint8_t *pabyValidity = nullptr;
1805 712 : int64_t *panValues = static_cast<int64_t *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
1806 : sizeof(int64_t) * (nFeatureCountLimit + 1)));
1807 712 : if (panValues == nullptr)
1808 0 : return false;
1809 712 : psChild->buffers[1] = panValues;
1810 : struct tm brokenDown;
1811 712 : memset(&brokenDown, 0, sizeof(brokenDown));
1812 3141 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat)
1813 : {
1814 2429 : auto &poFeature = apoFeatures[iFeat];
1815 2429 : const auto psRawField = poFeature->GetRawFieldRef(i);
1816 2429 : if (IsValidField(psRawField))
1817 : {
1818 1670 : brokenDown.tm_year = psRawField->Date.Year - 1900;
1819 1670 : brokenDown.tm_mon = psRawField->Date.Month - 1;
1820 1670 : brokenDown.tm_mday = psRawField->Date.Day;
1821 1670 : brokenDown.tm_hour = psRawField->Date.Hour;
1822 1670 : brokenDown.tm_min = psRawField->Date.Minute;
1823 1670 : brokenDown.tm_sec = static_cast<int>(psRawField->Date.Second);
1824 : auto nVal =
1825 1670 : CPLYMDHMSToUnixTime(&brokenDown) * 1000 +
1826 1670 : (static_cast<int>(psRawField->Date.Second * 1000 + 0.5f) %
1827 1670 : 1000);
1828 1670 : if (nFieldTZFlag >= OGR_TZFLAG_MIXED_TZ &&
1829 65 : psRawField->Date.TZFlag > OGR_TZFLAG_MIXED_TZ)
1830 : {
1831 : // Convert for psRawField->Date.TZFlag to UTC
1832 65 : const int TZOffset =
1833 65 : (psRawField->Date.TZFlag - OGR_TZFLAG_UTC) * 15;
1834 65 : const int TZOffsetMS = TZOffset * 60 * 1000;
1835 65 : nVal -= TZOffsetMS;
1836 : }
1837 1670 : panValues[iFeat] = nVal;
1838 : }
1839 759 : else if (bIsNullable)
1840 : {
1841 759 : panValues[iFeat] = 0;
1842 759 : ++psChild->null_count;
1843 759 : if (pabyValidity == nullptr)
1844 : {
1845 261 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1846 261 : psChild->buffers[0] = pabyValidity;
1847 261 : if (pabyValidity == nullptr)
1848 0 : return false;
1849 : }
1850 759 : UnsetBit(pabyValidity, iFeat);
1851 : }
1852 : else
1853 : {
1854 0 : panValues[iFeat] = 0;
1855 : }
1856 : }
1857 712 : return true;
1858 : }
1859 :
1860 : /************************************************************************/
1861 : /* FillDateTimeArrayWithTimeZone() */
1862 : /************************************************************************/
1863 :
1864 3 : static bool FillDateTimeArrayWithTimeZone(
1865 : struct ArrowArray *psChild,
1866 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1867 : const size_t nFeatureCountLimit, const bool bIsNullable, const int i,
1868 : int nFieldTZFlag)
1869 : {
1870 3 : psChild->n_children = 2;
1871 3 : psChild->children = static_cast<struct ArrowArray **>(
1872 3 : CPLCalloc(2, sizeof(struct ArrowArray *)));
1873 3 : psChild->n_buffers = 1;
1874 3 : psChild->buffers = static_cast<const void **>(CPLCalloc(1, sizeof(void *)));
1875 3 : uint8_t *pabyValidity = nullptr;
1876 :
1877 : // Create sub-array for timestamp in UTC
1878 6 : psChild->children[0] = static_cast<struct ArrowArray *>(
1879 3 : CPLCalloc(1, sizeof(struct ArrowArray)));
1880 3 : psChild->children[0]->n_buffers = 2;
1881 6 : psChild->children[0]->buffers =
1882 3 : static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1883 3 : psChild->children[0]->release = OGRLayerDefaultReleaseArray;
1884 : int64_t *panTimestamps = static_cast<int64_t *>(
1885 3 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(int64_t) * nFeatureCountLimit));
1886 3 : if (panTimestamps == nullptr)
1887 0 : return false;
1888 3 : psChild->children[0]->buffers[1] = panTimestamps;
1889 :
1890 : // Create sub-array for offset to UTC in minutes
1891 6 : psChild->children[1] = static_cast<struct ArrowArray *>(
1892 3 : CPLCalloc(1, sizeof(struct ArrowArray)));
1893 3 : psChild->children[1]->n_buffers = 2;
1894 6 : psChild->children[1]->buffers =
1895 3 : static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1896 3 : psChild->children[1]->release = OGRLayerDefaultReleaseArray;
1897 : int16_t *panOffsetsMinutes = static_cast<int16_t *>(
1898 3 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(int16_t) * nFeatureCountLimit));
1899 3 : if (panOffsetsMinutes == nullptr)
1900 0 : return false;
1901 3 : psChild->children[1]->buffers[1] = panOffsetsMinutes;
1902 :
1903 : struct tm brokenDown;
1904 3 : memset(&brokenDown, 0, sizeof(brokenDown));
1905 :
1906 15 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat)
1907 : {
1908 12 : auto &poFeature = apoFeatures[iFeat];
1909 12 : const auto psRawField = poFeature->GetRawFieldRef(i);
1910 12 : panTimestamps[iFeat] = 0;
1911 12 : panOffsetsMinutes[iFeat] = 0;
1912 12 : if (IsValidField(psRawField))
1913 : {
1914 9 : brokenDown.tm_year = psRawField->Date.Year - 1900;
1915 9 : brokenDown.tm_mon = psRawField->Date.Month - 1;
1916 9 : brokenDown.tm_mday = psRawField->Date.Day;
1917 9 : brokenDown.tm_hour = psRawField->Date.Hour;
1918 9 : brokenDown.tm_min = psRawField->Date.Minute;
1919 9 : brokenDown.tm_sec = static_cast<int>(psRawField->Date.Second);
1920 : auto nVal =
1921 9 : CPLYMDHMSToUnixTime(&brokenDown) * 1000 +
1922 9 : (static_cast<int>(psRawField->Date.Second * 1000 + 0.5f) %
1923 9 : 1000);
1924 9 : if (nFieldTZFlag >= OGR_TZFLAG_MIXED_TZ &&
1925 9 : psRawField->Date.TZFlag > OGR_TZFLAG_MIXED_TZ)
1926 : {
1927 : // Convert for psRawField->Date.TZFlag to UTC
1928 6 : const int TZOffsetMinute =
1929 6 : (psRawField->Date.TZFlag - OGR_TZFLAG_UTC) * 15;
1930 6 : const int TZOffsetMS = TZOffsetMinute * 60 * 1000;
1931 6 : nVal -= TZOffsetMS;
1932 :
1933 6 : panOffsetsMinutes[iFeat] = static_cast<int16_t>(TZOffsetMinute);
1934 : }
1935 9 : panTimestamps[iFeat] = nVal;
1936 : }
1937 3 : else if (bIsNullable)
1938 : {
1939 3 : ++psChild->null_count;
1940 3 : if (pabyValidity == nullptr)
1941 : {
1942 3 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1943 3 : psChild->buffers[0] = pabyValidity;
1944 3 : if (pabyValidity == nullptr)
1945 0 : return false;
1946 : }
1947 3 : UnsetBit(pabyValidity, iFeat);
1948 : }
1949 : }
1950 3 : return true;
1951 : }
1952 :
1953 : /************************************************************************/
1954 : /* FillDateTimeArrayAsString() */
1955 : /************************************************************************/
1956 :
1957 : static size_t
1958 9 : FillDateTimeArrayAsString(struct ArrowArray *psChild,
1959 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1960 : const size_t nFeatureCountLimit,
1961 : const bool bIsNullable, const int i,
1962 : const size_t nMemLimit)
1963 : {
1964 9 : psChild->n_buffers = 3;
1965 9 : psChild->buffers = static_cast<const void **>(CPLCalloc(3, sizeof(void *)));
1966 9 : uint8_t *pabyValidity = nullptr;
1967 : using T = uint32_t;
1968 : T *panOffsets = static_cast<T *>(
1969 9 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(T) * (1 + nFeatureCountLimit)));
1970 9 : if (panOffsets == nullptr)
1971 0 : return 0;
1972 9 : psChild->buffers[1] = panOffsets;
1973 :
1974 9 : size_t nOffset = 0;
1975 9 : size_t nFeatCount = 0;
1976 51 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat, ++nFeatCount)
1977 : {
1978 42 : panOffsets[iFeat] = static_cast<T>(nOffset);
1979 42 : const auto psRawField = apoFeatures[iFeat]->GetRawFieldRef(i);
1980 42 : if (IsValidField(psRawField))
1981 : {
1982 39 : size_t nLen = strlen("YYYY-MM-DDTHH:MM:SS");
1983 39 : if (fmodf(psRawField->Date.Second, 1.0f) != 0)
1984 27 : nLen += strlen(".sss");
1985 39 : if (psRawField->Date.TZFlag == OGR_TZFLAG_UTC)
1986 7 : nLen += 1; // 'Z'
1987 32 : else if (psRawField->Date.TZFlag > OGR_TZFLAG_MIXED_TZ)
1988 14 : nLen += strlen("+hh:mm");
1989 39 : if (nLen > nMemLimit - nOffset)
1990 : {
1991 0 : if (nFeatCount == 0)
1992 0 : return 0;
1993 0 : break;
1994 : }
1995 39 : nOffset += static_cast<T>(nLen);
1996 : }
1997 3 : else if (bIsNullable)
1998 : {
1999 3 : ++psChild->null_count;
2000 3 : if (pabyValidity == nullptr)
2001 : {
2002 3 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
2003 3 : psChild->buffers[0] = pabyValidity;
2004 3 : if (pabyValidity == nullptr)
2005 0 : return 0;
2006 : }
2007 3 : UnsetBit(pabyValidity, iFeat);
2008 : }
2009 : }
2010 9 : panOffsets[nFeatCount] = static_cast<T>(nOffset);
2011 :
2012 : char *pachValues =
2013 9 : static_cast<char *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nOffset + 1));
2014 9 : if (pachValues == nullptr)
2015 0 : return 0;
2016 9 : psChild->buffers[2] = pachValues;
2017 :
2018 9 : nOffset = 0;
2019 : char szBuffer[OGR_SIZEOF_ISO8601_DATETIME_BUFFER];
2020 : OGRISO8601Format sFormat;
2021 9 : sFormat.ePrecision = OGRISO8601Precision::AUTO;
2022 51 : for (size_t iFeat = 0; iFeat < nFeatCount; ++iFeat)
2023 : {
2024 42 : const int nLen =
2025 42 : static_cast<int>(panOffsets[iFeat + 1] - panOffsets[iFeat]);
2026 42 : if (nLen)
2027 : {
2028 39 : const auto psRawField = apoFeatures[iFeat]->GetRawFieldRef(i);
2029 39 : int nBufSize = OGRGetISO8601DateTime(psRawField, sFormat, szBuffer);
2030 39 : if (nBufSize)
2031 : {
2032 39 : memcpy(pachValues + nOffset, szBuffer,
2033 39 : std::min(nLen, nBufSize));
2034 : }
2035 39 : if (nBufSize < nLen)
2036 : {
2037 5 : memset(pachValues + nOffset + nBufSize, 0, nLen - nBufSize);
2038 : }
2039 39 : nOffset += nLen;
2040 : }
2041 : }
2042 :
2043 9 : return nFeatCount;
2044 : }
2045 :
2046 : /************************************************************************/
2047 : /* GetNextArrowArray() */
2048 : /************************************************************************/
2049 :
2050 : /** Default implementation of the ArrowArrayStream::get_next() callback.
2051 : *
2052 : * To be used by driver implementations that have a custom GetArrowStream()
2053 : * implementation.
2054 : *
2055 : * @since GDAL 3.6
2056 : */
2057 3592 : int OGRLayer::GetNextArrowArray(struct ArrowArrayStream *stream,
2058 : struct ArrowArray *out_array)
2059 : {
2060 3592 : ArrowArrayStreamPrivateDataSharedDataWrapper *poPrivate =
2061 : static_cast<ArrowArrayStreamPrivateDataSharedDataWrapper *>(
2062 : stream->private_data);
2063 :
2064 3592 : const bool bIncludeFID = CPLTestBool(
2065 : m_aosArrowArrayStreamOptions.FetchNameValueDef("INCLUDE_FID", "YES"));
2066 3592 : const bool bDateTimeAsString = m_aosArrowArrayStreamOptions.FetchBool(
2067 : GAS_OPT_DATETIME_AS_STRING, false);
2068 3592 : int nMaxBatchSize = atoi(m_aosArrowArrayStreamOptions.FetchNameValueDef(
2069 : "MAX_FEATURES_IN_BATCH", "65536"));
2070 3592 : if (nMaxBatchSize <= 0)
2071 0 : nMaxBatchSize = 1;
2072 3592 : if (nMaxBatchSize > INT_MAX - 1)
2073 0 : nMaxBatchSize = INT_MAX - 1;
2074 :
2075 : auto &oFeatureQueue =
2076 3592 : m_poSharedArrowArrayStreamPrivateData->m_oFeatureQueue;
2077 :
2078 3592 : memset(out_array, 0, sizeof(*out_array));
2079 :
2080 3592 : auto poLayerDefn = GetLayerDefn();
2081 3592 : const int nFieldCount = poLayerDefn->GetFieldCount();
2082 3592 : const int nGeomFieldCount = poLayerDefn->GetGeomFieldCount();
2083 3592 : const int nMaxChildren =
2084 3592 : (bIncludeFID ? 1 : 0) + nFieldCount + nGeomFieldCount;
2085 3592 : int iSchemaChild = 0;
2086 :
2087 3592 : if (!m_poSharedArrowArrayStreamPrivateData->m_anQueriedFIDs.empty())
2088 : {
2089 6 : if (poPrivate->poShared->m_bEOF)
2090 : {
2091 2 : return 0;
2092 : }
2093 4 : if (m_poSharedArrowArrayStreamPrivateData->m_iQueriedFIDS == 0)
2094 : {
2095 4 : CPLDebug("OGR", "Using fast FID filtering");
2096 : }
2097 8 : while (
2098 24 : oFeatureQueue.size() < static_cast<size_t>(nMaxBatchSize) &&
2099 12 : m_poSharedArrowArrayStreamPrivateData->m_iQueriedFIDS <
2100 12 : m_poSharedArrowArrayStreamPrivateData->m_anQueriedFIDs.size())
2101 : {
2102 : const auto nFID =
2103 8 : m_poSharedArrowArrayStreamPrivateData->m_anQueriedFIDs
2104 8 : [m_poSharedArrowArrayStreamPrivateData->m_iQueriedFIDS];
2105 16 : auto poFeature = std::unique_ptr<OGRFeature>(GetFeature(nFID));
2106 8 : ++m_poSharedArrowArrayStreamPrivateData->m_iQueriedFIDS;
2107 8 : if (poFeature && (m_poFilterGeom == nullptr ||
2108 0 : FilterGeometry(poFeature->GetGeomFieldRef(
2109 8 : m_iGeomFieldFilter))))
2110 : {
2111 4 : oFeatureQueue.emplace_back(std::move(poFeature));
2112 : }
2113 : }
2114 8 : if (m_poSharedArrowArrayStreamPrivateData->m_iQueriedFIDS ==
2115 4 : m_poSharedArrowArrayStreamPrivateData->m_anQueriedFIDs.size())
2116 : {
2117 4 : poPrivate->poShared->m_bEOF = true;
2118 : }
2119 : }
2120 3586 : else if (!poPrivate->poShared->m_bEOF)
2121 : {
2122 18893 : while (oFeatureQueue.size() < static_cast<size_t>(nMaxBatchSize))
2123 : {
2124 18890 : auto poFeature = std::unique_ptr<OGRFeature>(GetNextFeature());
2125 18890 : if (!poFeature)
2126 : {
2127 1847 : poPrivate->poShared->m_bEOF = true;
2128 1847 : break;
2129 : }
2130 17043 : oFeatureQueue.emplace_back(std::move(poFeature));
2131 : }
2132 : }
2133 3590 : if (oFeatureQueue.empty())
2134 : {
2135 2125 : return 0;
2136 : }
2137 :
2138 1465 : out_array->release = OGRLayerDefaultReleaseArray;
2139 1465 : out_array->null_count = 0;
2140 :
2141 1465 : out_array->n_children = nMaxChildren;
2142 1465 : out_array->children = static_cast<struct ArrowArray **>(
2143 1465 : CPLCalloc(nMaxChildren, sizeof(struct ArrowArray *)));
2144 1465 : out_array->release = OGRLayerDefaultReleaseArray;
2145 1465 : out_array->n_buffers = 1;
2146 1465 : out_array->buffers =
2147 1465 : static_cast<const void **>(CPLCalloc(1, sizeof(void *)));
2148 :
2149 1465 : size_t nFeatureCount = oFeatureQueue.size();
2150 1465 : const uint32_t nMemLimit = OGRArrowArrayHelper::GetMemLimit();
2151 2930 : std::set<int> anArrayIndicesOfStructDateTime;
2152 1465 : if (bIncludeFID)
2153 : {
2154 2662 : out_array->children[iSchemaChild] = static_cast<struct ArrowArray *>(
2155 1331 : CPLCalloc(1, sizeof(struct ArrowArray)));
2156 1331 : auto psChild = out_array->children[iSchemaChild];
2157 1331 : ++iSchemaChild;
2158 1331 : psChild->release = OGRLayerDefaultReleaseArray;
2159 1331 : psChild->n_buffers = 2;
2160 1331 : psChild->buffers =
2161 1331 : static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
2162 : int64_t *panValues =
2163 1331 : static_cast<int64_t *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
2164 : sizeof(int64_t) * (oFeatureQueue.size() + 1)));
2165 1331 : if (panValues == nullptr)
2166 0 : goto error;
2167 1331 : psChild->buffers[1] = panValues;
2168 17926 : for (size_t iFeat = 0; iFeat < oFeatureQueue.size(); ++iFeat)
2169 : {
2170 16595 : panValues[iFeat] = oFeatureQueue[iFeat]->GetFID();
2171 : }
2172 : }
2173 :
2174 13676 : for (int i = 0; i < nFieldCount; ++i)
2175 : {
2176 12236 : const auto poFieldDefn = poLayerDefn->GetFieldDefn(i);
2177 12236 : if (poFieldDefn->IsIgnored())
2178 : {
2179 13 : continue;
2180 : }
2181 :
2182 24446 : out_array->children[iSchemaChild] = static_cast<struct ArrowArray *>(
2183 12223 : CPLCalloc(1, sizeof(struct ArrowArray)));
2184 12223 : auto psChild = out_array->children[iSchemaChild];
2185 12223 : psChild->release = OGRLayerDefaultReleaseArray;
2186 12223 : const bool bIsNullable = CPL_TO_BOOL(poFieldDefn->IsNullable());
2187 12223 : const auto eSubType = poFieldDefn->GetSubType();
2188 12223 : switch (poFieldDefn->GetType())
2189 : {
2190 3523 : case OFTInteger:
2191 : {
2192 3523 : if (eSubType == OFSTBoolean)
2193 : {
2194 138 : if (!FillBoolArray(psChild, oFeatureQueue, nFeatureCount,
2195 : bIsNullable, &OGRField::Integer, i))
2196 0 : goto error;
2197 : }
2198 3385 : else if (eSubType == OFSTInt16)
2199 : {
2200 478 : if (!FillArray<int16_t>(psChild, oFeatureQueue,
2201 : nFeatureCount, bIsNullable,
2202 : &OGRField::Integer, i))
2203 0 : goto error;
2204 : }
2205 : else
2206 : {
2207 2907 : if (!FillArray<int32_t>(psChild, oFeatureQueue,
2208 : nFeatureCount, bIsNullable,
2209 : &OGRField::Integer, i))
2210 0 : goto error;
2211 : }
2212 :
2213 3523 : const auto &osDomainName = poFieldDefn->GetDomainName();
2214 3523 : if (!osDomainName.empty())
2215 : {
2216 13 : auto poDS = GetDataset();
2217 13 : if (poDS)
2218 : {
2219 : const auto poFieldDomain =
2220 13 : poDS->GetFieldDomain(osDomainName);
2221 26 : if (poFieldDomain &&
2222 13 : poFieldDomain->GetDomainType() == OFDT_CODED)
2223 : {
2224 13 : const OGRCodedFieldDomain *poCodedDomain =
2225 : static_cast<const OGRCodedFieldDomain *>(
2226 : poFieldDomain);
2227 13 : OGRArrowArrayHelper::FillDict(psChild,
2228 : poCodedDomain);
2229 : }
2230 : }
2231 : }
2232 :
2233 3523 : break;
2234 : }
2235 :
2236 333 : case OFTInteger64:
2237 : {
2238 333 : if (!FillArray<int64_t>(psChild, oFeatureQueue, nFeatureCount,
2239 : bIsNullable, &OGRField::Integer64, i))
2240 0 : goto error;
2241 333 : break;
2242 : }
2243 :
2244 2098 : case OFTReal:
2245 : {
2246 2098 : if (eSubType == OFSTFloat32)
2247 : {
2248 478 : if (!FillArray<float>(psChild, oFeatureQueue, nFeatureCount,
2249 : bIsNullable, &OGRField::Real, i))
2250 0 : goto error;
2251 : }
2252 : else
2253 : {
2254 1620 : if (!FillArray<double>(psChild, oFeatureQueue,
2255 : nFeatureCount, bIsNullable,
2256 : &OGRField::Real, i))
2257 0 : goto error;
2258 : }
2259 2098 : break;
2260 : }
2261 :
2262 3767 : case OFTString:
2263 : case OFTWideString:
2264 : {
2265 3767 : const size_t nThisFeatureCount = FillStringArray<int32_t>(
2266 : psChild, oFeatureQueue, nFeatureCount, bIsNullable, i,
2267 : nMemLimit);
2268 3767 : if (nThisFeatureCount == 0)
2269 : {
2270 19 : goto error_max_mem;
2271 : }
2272 3748 : if (nThisFeatureCount < nFeatureCount)
2273 1 : nFeatureCount = nThisFeatureCount;
2274 3748 : break;
2275 : }
2276 :
2277 913 : case OFTBinary:
2278 : {
2279 913 : const int nWidth = poFieldDefn->GetWidth();
2280 913 : if (nWidth > 0)
2281 : {
2282 8 : if (nFeatureCount > nMemLimit / nWidth)
2283 : {
2284 1 : nFeatureCount = nMemLimit / nWidth;
2285 1 : if (nFeatureCount == 0)
2286 0 : goto error_max_mem;
2287 : }
2288 8 : if (!FillFixedWidthBinaryArray(psChild, oFeatureQueue,
2289 : nFeatureCount, bIsNullable,
2290 : nWidth, i))
2291 0 : goto error;
2292 : }
2293 : else
2294 : {
2295 905 : const size_t nThisFeatureCount = FillBinaryArray<int32_t>(
2296 : psChild, oFeatureQueue, nFeatureCount, bIsNullable, i,
2297 : nMemLimit);
2298 905 : if (nThisFeatureCount == 0)
2299 : {
2300 1 : goto error_max_mem;
2301 : }
2302 904 : if (nThisFeatureCount < nFeatureCount)
2303 1 : nFeatureCount = nThisFeatureCount;
2304 : }
2305 912 : break;
2306 : }
2307 :
2308 234 : case OFTIntegerList:
2309 : {
2310 : size_t nThisFeatureCount;
2311 234 : if (eSubType == OFSTBoolean)
2312 : {
2313 : nThisFeatureCount =
2314 49 : FillListArrayBool<int32_t, GetFromIntegerList>(
2315 : psChild, oFeatureQueue, nFeatureCount, bIsNullable,
2316 : i, nMemLimit);
2317 : }
2318 185 : else if (eSubType == OFSTInt16)
2319 : {
2320 : nThisFeatureCount =
2321 28 : FillListArray<int32_t, int16_t, GetFromIntegerList>(
2322 : psChild, oFeatureQueue, nFeatureCount, bIsNullable,
2323 : i, nMemLimit);
2324 : }
2325 : else
2326 : {
2327 : nThisFeatureCount =
2328 157 : FillListArray<int32_t, int32_t, GetFromIntegerList>(
2329 : psChild, oFeatureQueue, nFeatureCount, bIsNullable,
2330 : i, nMemLimit);
2331 : }
2332 234 : if (nThisFeatureCount == 0)
2333 : {
2334 2 : goto error_max_mem;
2335 : }
2336 232 : if (nThisFeatureCount < nFeatureCount)
2337 2 : nFeatureCount = nThisFeatureCount;
2338 232 : break;
2339 : }
2340 :
2341 75 : case OFTInteger64List:
2342 : {
2343 : const size_t nThisFeatureCount =
2344 75 : FillListArray<int32_t, int64_t, GetFromInteger64List>(
2345 : psChild, oFeatureQueue, nFeatureCount, bIsNullable, i,
2346 : nMemLimit);
2347 75 : if (nThisFeatureCount == 0)
2348 : {
2349 1 : goto error_max_mem;
2350 : }
2351 74 : if (nThisFeatureCount < nFeatureCount)
2352 1 : nFeatureCount = nThisFeatureCount;
2353 74 : break;
2354 : }
2355 :
2356 156 : case OFTRealList:
2357 : {
2358 : size_t nThisFeatureCount;
2359 156 : if (eSubType == OFSTFloat32)
2360 : {
2361 : nThisFeatureCount =
2362 41 : FillListArray<int32_t, float, GetFromRealList>(
2363 : psChild, oFeatureQueue, nFeatureCount, bIsNullable,
2364 : i, nMemLimit);
2365 : }
2366 : else
2367 : {
2368 : nThisFeatureCount =
2369 115 : FillListArray<int32_t, double, GetFromRealList>(
2370 : psChild, oFeatureQueue, nFeatureCount, bIsNullable,
2371 : i, nMemLimit);
2372 : }
2373 156 : if (nThisFeatureCount == 0)
2374 : {
2375 1 : goto error_max_mem;
2376 : }
2377 155 : if (nThisFeatureCount < nFeatureCount)
2378 1 : nFeatureCount = nThisFeatureCount;
2379 155 : break;
2380 : }
2381 :
2382 203 : case OFTStringList:
2383 : case OFTWideStringList:
2384 : {
2385 203 : const size_t nThisFeatureCount = FillStringListArray<int32_t>(
2386 : psChild, oFeatureQueue, nFeatureCount, bIsNullable, i,
2387 : nMemLimit);
2388 203 : if (nThisFeatureCount == 0)
2389 : {
2390 1 : goto error_max_mem;
2391 : }
2392 202 : if (nThisFeatureCount < nFeatureCount)
2393 1 : nFeatureCount = nThisFeatureCount;
2394 202 : break;
2395 : }
2396 :
2397 125 : case OFTDate:
2398 : {
2399 125 : if (!FillDateArray(psChild, oFeatureQueue, nFeatureCount,
2400 : bIsNullable, i))
2401 0 : goto error;
2402 125 : break;
2403 : }
2404 :
2405 72 : case OFTTime:
2406 : {
2407 72 : if (!FillTimeArray(psChild, oFeatureQueue, nFeatureCount,
2408 : bIsNullable, i))
2409 0 : goto error;
2410 72 : break;
2411 : }
2412 :
2413 724 : case OFTDateTime:
2414 : {
2415 724 : if (bDateTimeAsString)
2416 : {
2417 9 : const size_t nThisFeatureCount = FillDateTimeArrayAsString(
2418 : psChild, oFeatureQueue, nFeatureCount, bIsNullable, i,
2419 : nMemLimit);
2420 9 : if (nThisFeatureCount == 0)
2421 : {
2422 0 : goto error_max_mem;
2423 : }
2424 9 : if (nThisFeatureCount < nFeatureCount)
2425 0 : nFeatureCount = nThisFeatureCount;
2426 : }
2427 : else
2428 : {
2429 : const char *pszTZOverride =
2430 715 : m_aosArrowArrayStreamOptions.FetchNameValue("TIMEZONE");
2431 715 : const int nTZFlag = poFieldDefn->GetTZFlag();
2432 715 : if ((pszTZOverride && EQUAL(pszTZOverride, "mixed")) ||
2433 706 : (!pszTZOverride && nTZFlag == OGR_TZFLAG_MIXED_TZ))
2434 :
2435 : {
2436 3 : anArrayIndicesOfStructDateTime.insert(iSchemaChild);
2437 3 : if (!FillDateTimeArrayWithTimeZone(
2438 : psChild, oFeatureQueue, nFeatureCount,
2439 : bIsNullable, i, nTZFlag))
2440 : {
2441 0 : goto error;
2442 : }
2443 : }
2444 712 : else if (!FillDateTimeArray(psChild, oFeatureQueue,
2445 : nFeatureCount, bIsNullable, i,
2446 : nTZFlag))
2447 : {
2448 0 : goto error;
2449 : }
2450 : }
2451 724 : break;
2452 : }
2453 : }
2454 :
2455 12198 : ++iSchemaChild;
2456 : }
2457 2688 : for (int i = 0; i < nGeomFieldCount; ++i)
2458 : {
2459 1248 : const auto poFieldDefn = poLayerDefn->GetGeomFieldDefn(i);
2460 1248 : if (poFieldDefn->IsIgnored())
2461 : {
2462 3 : continue;
2463 : }
2464 :
2465 2490 : out_array->children[iSchemaChild] = static_cast<struct ArrowArray *>(
2466 1245 : CPLCalloc(1, sizeof(struct ArrowArray)));
2467 1245 : auto psChild = out_array->children[iSchemaChild];
2468 1245 : ++iSchemaChild;
2469 1245 : psChild->release = OGRLayerDefaultReleaseArray;
2470 1245 : psChild->length = oFeatureQueue.size();
2471 1245 : const size_t nThisFeatureCount = FillWKBGeometryArray<int32_t>(
2472 : psChild, oFeatureQueue, nFeatureCount, poFieldDefn, i, nMemLimit);
2473 1245 : if (nThisFeatureCount == 0)
2474 : {
2475 0 : goto error_max_mem;
2476 : }
2477 1245 : if (nThisFeatureCount < nFeatureCount)
2478 1 : nFeatureCount = nThisFeatureCount;
2479 : }
2480 :
2481 : // Remove consumed features from the queue
2482 1440 : if (nFeatureCount == oFeatureQueue.size())
2483 1431 : oFeatureQueue.clear();
2484 : else
2485 : {
2486 27 : for (size_t i = 0; i < nFeatureCount; ++i)
2487 : {
2488 18 : oFeatureQueue.pop_front();
2489 : }
2490 : }
2491 :
2492 1440 : out_array->n_children = iSchemaChild;
2493 1440 : out_array->length = nFeatureCount;
2494 16110 : for (int i = 0; i < out_array->n_children; ++i)
2495 : {
2496 14670 : out_array->children[i]->length = nFeatureCount;
2497 14670 : if (cpl::contains(anArrayIndicesOfStructDateTime, i))
2498 : {
2499 9 : for (int j = 0; j < out_array->children[i]->n_children; ++j)
2500 : {
2501 6 : out_array->children[i]->children[j]->length = nFeatureCount;
2502 : }
2503 : }
2504 : }
2505 :
2506 1440 : return 0;
2507 :
2508 25 : error_max_mem:
2509 25 : CPLError(CE_Failure, CPLE_AppDefined,
2510 : "Too large feature: not even a single feature can be returned");
2511 25 : error:
2512 25 : oFeatureQueue.clear();
2513 25 : poPrivate->poShared->m_bEOF = true;
2514 25 : out_array->release(out_array);
2515 25 : memset(out_array, 0, sizeof(*out_array));
2516 25 : return ENOMEM;
2517 : }
2518 :
2519 : /************************************************************************/
2520 : /* StaticGetNextArrowArray() */
2521 : /************************************************************************/
2522 :
2523 : /** Default implementation of the ArrowArrayStream::get_next() callback.
2524 : *
2525 : * To be used by driver implementations that have a custom GetArrowStream()
2526 : * implementation.
2527 : *
2528 : * @since GDAL 3.6
2529 : */
2530 4586 : int OGRLayer::StaticGetNextArrowArray(struct ArrowArrayStream *stream,
2531 : struct ArrowArray *out_array)
2532 : {
2533 : auto poLayer = static_cast<ArrowArrayStreamPrivateDataSharedDataWrapper *>(
2534 4586 : stream->private_data)
2535 4586 : ->poShared->m_poLayer;
2536 4586 : if (poLayer == nullptr)
2537 : {
2538 1 : CPLError(CE_Failure, CPLE_NotSupported,
2539 : "Calling get_next() on a freed OGRLayer is not supported");
2540 1 : return EINVAL;
2541 : }
2542 4585 : return poLayer->GetNextArrowArray(stream, out_array);
2543 : }
2544 :
2545 : /************************************************************************/
2546 : /* ReleaseStream() */
2547 : /************************************************************************/
2548 :
2549 : /** Release a ArrowArrayStream.
2550 : *
2551 : * To be used by driver implementations that have a custom GetArrowStream()
2552 : * implementation.
2553 : *
2554 : * @param stream Arrow array stream to release.
2555 : * @since GDAL 3.6
2556 : */
2557 2257 : void OGRLayer::ReleaseStream(struct ArrowArrayStream *stream)
2558 : {
2559 2257 : assert(stream->release == OGRLayer::ReleaseStream);
2560 2257 : ArrowArrayStreamPrivateDataSharedDataWrapper *poPrivate =
2561 : static_cast<ArrowArrayStreamPrivateDataSharedDataWrapper *>(
2562 : stream->private_data);
2563 2257 : poPrivate->poShared->m_bArrowArrayStreamInProgress = false;
2564 2257 : poPrivate->poShared->m_bEOF = false;
2565 2257 : if (poPrivate->poShared->m_poLayer)
2566 2207 : poPrivate->poShared->m_poLayer->ResetReading();
2567 2257 : delete poPrivate;
2568 2257 : stream->private_data = nullptr;
2569 2257 : stream->release = nullptr;
2570 2257 : }
2571 :
2572 : /************************************************************************/
2573 : /* GetLastErrorArrowArrayStream() */
2574 : /************************************************************************/
2575 :
2576 : /** Default implementation of the ArrowArrayStream::get_last_error() callback.
2577 : *
2578 : * To be used by driver implementations that have a custom GetArrowStream()
2579 : * implementation.
2580 : *
2581 : * @since GDAL 3.6
2582 : */
2583 3 : const char *OGRLayer::GetLastErrorArrowArrayStream(struct ArrowArrayStream *)
2584 : {
2585 3 : const char *pszLastErrorMsg = CPLGetLastErrorMsg();
2586 3 : return pszLastErrorMsg[0] != '\0' ? pszLastErrorMsg : nullptr;
2587 : }
2588 :
2589 : /************************************************************************/
2590 : /* GetArrowStream() */
2591 : /************************************************************************/
2592 :
2593 : /** Get a Arrow C stream.
2594 : *
2595 : * On successful return, and when the stream interfaces is no longer needed, it
2596 : * must must be freed with out_stream->release(out_stream). Please carefully
2597 : * read https://arrow.apache.org/docs/format/CStreamInterface.html for more
2598 : * details on using Arrow C stream.
2599 : *
2600 : * The method may take into account ignored fields set with SetIgnoredFields()
2601 : * (the default implementation does), and should take into account filters set
2602 : * with SetSpatialFilter() and SetAttributeFilter(). Note however that
2603 : * specialized implementations may fallback to the default (slower)
2604 : * implementation when filters are set.
2605 : * Drivers that have a specialized implementation should advertise the
2606 : * OLCFastGetArrowStream capability.
2607 : *
2608 : * There are extra precautions to take into account in a OGR context. Unless
2609 : * otherwise specified by a particular driver implementation, the get_schema(),
2610 : * get_next() and get_last_error() function pointers of the ArrowArrayStream
2611 : * structure should no longer be used after the OGRLayer, from which the
2612 : * ArrowArrayStream structure was initialized, has been destroyed (typically at
2613 : * dataset closing). The reason is that those function pointers will typically
2614 : * point to methods of the OGRLayer instance.
2615 : * However, the ArrowSchema and ArrowArray structures filled from those
2616 : * callbacks can be used and must be released independently from the
2617 : * ArrowArrayStream or the layer.
2618 : *
2619 : * Furthermore, unless otherwise specified by a particular driver
2620 : * implementation, only one ArrowArrayStream can be active at a time on
2621 : * a given layer (that is the last active one must be explicitly released before
2622 : * a next one is asked). Changing filter state, ignored columns, modifying the
2623 : * schema or using ResetReading()/GetNextFeature() while using a
2624 : * ArrowArrayStream is strongly discouraged and may lead to unexpected results.
2625 : * As a rule of thumb, no OGRLayer methods that affect the state of a layer
2626 : * should be called on a layer, while an ArrowArrayStream on it is active.
2627 : *
2628 : * Starting with GDAL 3.8, the ArrowSchema::metadata field filled by the
2629 : * get_schema() callback may be set with the potential following items:
2630 : * <ul>
2631 : * <li>"GDAL:OGR:type": value of OGRFieldDefn::GetType(): (added in 3.11)
2632 : * Only used for DateTime fields when the DATETIME_AS_STRING=YES option is
2633 : * specified.</li>
2634 : * <li>"GDAL:OGR:alternative_name": value of
2635 : * OGRFieldDefn::GetAlternativeNameRef()</li>
2636 : * <li>"GDAL:OGR:comment": value of OGRFieldDefn::GetComment()</li>
2637 : * <li>"GDAL:OGR:default": value of OGRFieldDefn::GetDefault()</li>
2638 : * <li>"GDAL:OGR:subtype": value of OGRFieldDefn::GetSubType()</li>
2639 : * <li>"GDAL:OGR:width": value of OGRFieldDefn::GetWidth() (serialized as a
2640 : * string)</li>
2641 : * <li>"GDAL:OGR:unique": value of OGRFieldDefn::IsUnique() (serialized as
2642 : * "true" or "false")</li>
2643 : * <li>"GDAL:OGR:domain_name": value of OGRFieldDefn::GetDomainName()</li>
2644 : * </ul>
2645 : *
2646 : * A potential usage can be:
2647 : \code{.cpp}
2648 : struct ArrowArrayStream stream;
2649 : if( !poLayer->GetArrowStream(&stream, nullptr))
2650 : {
2651 : CPLError(CE_Failure, CPLE_AppDefined, "GetArrowStream() failed\n");
2652 : exit(1);
2653 : }
2654 : struct ArrowSchema schema;
2655 : if( stream.get_schema(&stream, &schema) == 0 )
2656 : {
2657 : // Do something useful
2658 : schema.release(schema);
2659 : }
2660 : while( true )
2661 : {
2662 : struct ArrowArray array;
2663 : // Look for an error (get_next() returning a non-zero code), or
2664 : // end of iteration (array.release == nullptr)
2665 : if( stream.get_next(&stream, &array) != 0 ||
2666 : array.release == nullptr )
2667 : {
2668 : break;
2669 : }
2670 : // Do something useful
2671 : array.release(&array);
2672 : }
2673 : stream.release(&stream);
2674 : \endcode
2675 : *
2676 : * A full example is available in the
2677 : * <a
2678 : href="https://gdal.org/tutorials/vector_api_tut.html#reading-from-ogr-using-the-arrow-c-stream-data-interface">Reading
2679 : From OGR using the Arrow C Stream data interface</a> tutorial.
2680 : *
2681 : * Options may be driver specific. The default implementation recognizes the
2682 : * following options:
2683 : * <ul>
2684 : * <li>INCLUDE_FID=YES/NO. Whether to include the FID column. Defaults to YES.
2685 : * </li>
2686 : * <li>MAX_FEATURES_IN_BATCH=integer. Maximum number of features to retrieve in
2687 : * a ArrowArray batch. Defaults to 65 536.</li>
2688 : * <li>TIMEZONE="unknown", "mixed", "UTC", "(+|:)HH:MM" or any other value supported by
2689 : * Arrow. (GDAL >= 3.8)
2690 : * Override the timezone flag nominally provided by
2691 : * OGRFieldDefn::GetTZFlag(), and used for the Arrow field timezone
2692 : * declaration, with a user specified timezone.
2693 : * Note that datetime values in Arrow arrays are always stored in UTC, and
2694 : * that the time zone flag used by GDAL to convert to UTC is the one of the
2695 : * OGRField::Date::TZFlag member at the OGRFeature level. The conversion
2696 : * to UTC of a OGRField::Date is only done if both the timezone indicated by
2697 : * OGRField::Date::TZFlag and the one at the OGRFieldDefn level (or set by
2698 : * this TIMEZONE option) are not unknown.
2699 : * Since GDAL 3.13, "mixed" can be used to create an Arrow structure field,
2700 : * following the "timestamp with offset" extension (https://github.com/apache/arrow/blob/main/docs/source/format/CanonicalExtensions.rst#timestamp-with-offset)
2701 : * and storing both a UTC timestamp and the offset in minutes from the UTC
2702 : * timezone.
2703 : * </li>
2704 : * <li>DATETIME_AS_STRING=YES/NO. Defaults to NO. Added in GDAL 3.11.
2705 : * Whether DateTime fields should be returned as a (normally ISO-8601
2706 : * formatted) string by drivers. The aim is to be able to handle mixed
2707 : * timezones (or timezone naive values) in the same column.
2708 : * All drivers must honour that option, and potentially fallback to the
2709 : * OGRLayer generic implementation if they cannot (which is the case for the
2710 : * Arrow, Parquet and ADBC drivers).
2711 : * When DATETIME_AS_STRING=YES, the TIMEZONE option is ignored.
2712 : * </li>
2713 : * <li>GEOMETRY_METADATA_ENCODING=OGC/GEOARROW (GDAL >= 3.8).
2714 : * The default is OGC, which will lead to setting
2715 : * the Arrow geometry column metadata to ARROW:extension:name=ogc.wkb.
2716 : * If setting to GEOMETRY_METADATA_ENCODING to GEOARROW,
2717 : * ARROW:extension:name=geoarrow.wkb and
2718 : * ARROW:extension:metadata={"crs": <projjson CRS representation>> are set.
2719 : * </li>
2720 : * </ul>
2721 : *
2722 : * The Arrow/Parquet drivers recognize the following option:
2723 : * <ul>
2724 : * <li>GEOMETRY_ENCODING=WKB. To force a fallback to the generic implementation
2725 : * when the native geometry encoding is not WKB. Otherwise the geometry
2726 : * will be returned with its native Arrow encoding
2727 : * (possibly using GeoArrow encoding).</li>
2728 : * </ul>
2729 : *
2730 : * @param out_stream Output stream. Must *not* be NULL. The content of the
2731 : * structure does not need to be initialized.
2732 : * @param papszOptions NULL terminated list of key=value options.
2733 : * @return true in case of success.
2734 : * @since GDAL 3.6
2735 : */
2736 2261 : bool OGRLayer::GetArrowStream(struct ArrowArrayStream *out_stream,
2737 : CSLConstList papszOptions)
2738 : {
2739 2261 : memset(out_stream, 0, sizeof(*out_stream));
2740 3820 : if (m_poSharedArrowArrayStreamPrivateData &&
2741 3820 : m_poSharedArrowArrayStreamPrivateData->m_bArrowArrayStreamInProgress)
2742 : {
2743 4 : CPLError(CE_Failure, CPLE_AppDefined,
2744 : "An arrow Arrow Stream is in progress on that layer. Only "
2745 : "one at a time is allowed in this implementation.");
2746 4 : return false;
2747 : }
2748 2257 : m_aosArrowArrayStreamOptions.Assign(CSLDuplicate(papszOptions), true);
2749 :
2750 2257 : out_stream->get_schema = OGRLayer::StaticGetArrowSchema;
2751 2257 : out_stream->get_next = OGRLayer::StaticGetNextArrowArray;
2752 2257 : out_stream->get_last_error = OGRLayer::GetLastErrorArrowArrayStream;
2753 2257 : out_stream->release = OGRLayer::ReleaseStream;
2754 :
2755 2257 : if (m_poSharedArrowArrayStreamPrivateData == nullptr)
2756 : {
2757 : m_poSharedArrowArrayStreamPrivateData =
2758 702 : std::make_shared<ArrowArrayStreamPrivateData>();
2759 702 : m_poSharedArrowArrayStreamPrivateData->m_poLayer = this;
2760 : }
2761 2257 : m_poSharedArrowArrayStreamPrivateData->m_bArrowArrayStreamInProgress = true;
2762 :
2763 : // Special case for "FID = constant", or "FID IN (constant1, ...., constantN)"
2764 2257 : m_poSharedArrowArrayStreamPrivateData->m_anQueriedFIDs.clear();
2765 2257 : m_poSharedArrowArrayStreamPrivateData->m_iQueriedFIDS = 0;
2766 2257 : if (m_poAttrQuery)
2767 : {
2768 : swq_expr_node *poNode =
2769 1066 : static_cast<swq_expr_node *>(m_poAttrQuery->GetSWQExpr());
2770 3198 : if (poNode->eNodeType == SNT_OPERATION &&
2771 1066 : (poNode->nOperation == SWQ_IN || poNode->nOperation == SWQ_EQ) &&
2772 834 : poNode->papoSubExpr[0]->eNodeType == SNT_COLUMN &&
2773 289 : poNode->papoSubExpr[0]->field_index ==
2774 2141 : GetLayerDefn()->GetFieldCount() + SPF_FID &&
2775 9 : TestCapability(OLCRandomRead))
2776 : {
2777 8 : std::set<GIntBig> oSetAlreadyListed;
2778 13 : for (int i = 1; i < poNode->nSubExprCount; ++i)
2779 : {
2780 27 : if (poNode->papoSubExpr[i]->eNodeType == SNT_CONSTANT &&
2781 18 : poNode->papoSubExpr[i]->field_type == SWQ_INTEGER64 &&
2782 9 : oSetAlreadyListed.find(poNode->papoSubExpr[i]->int_value) ==
2783 18 : oSetAlreadyListed.end())
2784 : {
2785 8 : oSetAlreadyListed.insert(poNode->papoSubExpr[i]->int_value);
2786 8 : m_poSharedArrowArrayStreamPrivateData->m_anQueriedFIDs
2787 8 : .push_back(poNode->papoSubExpr[i]->int_value);
2788 : }
2789 : }
2790 : }
2791 : }
2792 :
2793 2257 : auto poPrivateData = new ArrowArrayStreamPrivateDataSharedDataWrapper();
2794 2257 : poPrivateData->poShared = m_poSharedArrowArrayStreamPrivateData;
2795 2257 : out_stream->private_data = poPrivateData;
2796 2257 : return true;
2797 : }
2798 :
2799 : /************************************************************************/
2800 : /* OGR_L_GetArrowStream() */
2801 : /************************************************************************/
2802 :
2803 : /** Get a Arrow C stream.
2804 : *
2805 : * On successful return, and when the stream interfaces is no longer needed, it
2806 : * must be freed with out_stream->release(out_stream). Please carefully read
2807 : * https://arrow.apache.org/docs/format/CStreamInterface.html for more details
2808 : * on using Arrow C stream.
2809 : *
2810 : * The method may take into account ignored fields set with SetIgnoredFields()
2811 : * (the default implementation does), and should take into account filters set
2812 : * with SetSpatialFilter() and SetAttributeFilter(). Note however that
2813 : * specialized implementations may fallback to the default (slower)
2814 : * implementation when filters are set.
2815 : * Drivers that have a specialized implementation should
2816 : * advertise the OLCFastGetArrowStream capability.
2817 : *
2818 : * There are extra precautions to take into account in a OGR context. Unless
2819 : * otherwise specified by a particular driver implementation, the get_schema(),
2820 : * get_next() and get_last_error() function pointers of the ArrowArrayStream
2821 : * structure should no longer be used after the OGRLayer, from which the
2822 : * ArrowArrayStream structure was initialized, has been destroyed (typically at
2823 : * dataset closing). The reason is that those function pointers will typically
2824 : * point to methods of the OGRLayer instance.
2825 : * However, the ArrowSchema and ArrowArray structures filled from those
2826 : * callbacks can be used and must be released independently from the
2827 : * ArrowArrayStream or the layer.
2828 : *
2829 : * Furthermore, unless otherwise specified by a particular driver
2830 : * implementation, only one ArrowArrayStream can be active at a time on
2831 : * a given layer (that is the last active one must be explicitly released before
2832 : * a next one is asked). Changing filter state, ignored columns, modifying the
2833 : * schema or using ResetReading()/GetNextFeature() while using a
2834 : * ArrowArrayStream is strongly discouraged and may lead to unexpected results.
2835 : * As a rule of thumb, no OGRLayer methods that affect the state of a layer
2836 : * should be called on a layer, while an ArrowArrayStream on it is active.
2837 : *
2838 : * Starting with GDAL 3.8, the ArrowSchema::metadata field filled by the
2839 : * get_schema() callback may be set with the potential following items:
2840 : * <ul>
2841 : * <li>"GDAL:OGR:type": value of OGRFieldDefn::GetType(): (added in 3.11)
2842 : * Only used for DateTime fields when the DATETIME_AS_STRING=YES option is
2843 : * specified.</li>
2844 : * <li>"GDAL:OGR:alternative_name": value of
2845 : * OGRFieldDefn::GetAlternativeNameRef()</li>
2846 : * <li>"GDAL:OGR:comment": value of OGRFieldDefn::GetComment()</li>
2847 : * <li>"GDAL:OGR:default": value of OGRFieldDefn::GetDefault()</li>
2848 : * <li>"GDAL:OGR:subtype": value of OGRFieldDefn::GetSubType()</li>
2849 : * <li>"GDAL:OGR:width": value of OGRFieldDefn::GetWidth() (serialized as a
2850 : * string)</li>
2851 : * <li>"GDAL:OGR:unique": value of OGRFieldDefn::IsUnique() (serialized as
2852 : * "true" or "false")</li>
2853 : * <li>"GDAL:OGR:domain_name": value of OGRFieldDefn::GetDomainName()</li>
2854 : * </ul>
2855 : *
2856 : * A potential usage can be:
2857 : \code{.cpp}
2858 : struct ArrowArrayStream stream;
2859 : if( !OGR_L_GetArrowStream(hLayer, &stream, nullptr))
2860 : {
2861 : CPLError(CE_Failure, CPLE_AppDefined,
2862 : "OGR_L_GetArrowStream() failed\n");
2863 : exit(1);
2864 : }
2865 : struct ArrowSchema schema;
2866 : if( stream.get_schema(&stream, &schema) == 0 )
2867 : {
2868 : // Do something useful
2869 : schema.release(schema);
2870 : }
2871 : while( true )
2872 : {
2873 : struct ArrowArray array;
2874 : // Look for an error (get_next() returning a non-zero code), or
2875 : // end of iteration (array.release == nullptr)
2876 : if( stream.get_next(&stream, &array) != 0 ||
2877 : array.release == nullptr )
2878 : {
2879 : break;
2880 : }
2881 : // Do something useful
2882 : array.release(&array);
2883 : }
2884 : stream.release(&stream);
2885 : \endcode
2886 : *
2887 : * A full example is available in the
2888 : * <a
2889 : href="https://gdal.org/tutorials/vector_api_tut.html#reading-from-ogr-using-the-arrow-c-stream-data-interface">Reading
2890 : From OGR using the Arrow C Stream data interface</a> tutorial.
2891 : *
2892 : * Options may be driver specific. The default implementation recognizes the
2893 : * following options:
2894 : * <ul>
2895 : * <li>INCLUDE_FID=YES/NO. Whether to include the FID column. Defaults to
2896 : YES.</li>
2897 : * <li>MAX_FEATURES_IN_BATCH=integer. Maximum number of features to retrieve in
2898 : * a ArrowArray batch. Defaults to 65 536.</li>
2899 : * <li>TIMEZONE="unknown", "mixed", "UTC", "(+|:)HH:MM" or any other value supported by
2900 : * Arrow. (GDAL >= 3.8)
2901 : * Override the timezone flag nominally provided by
2902 : * OGRFieldDefn::GetTZFlag(), and used for the Arrow field timezone
2903 : * declaration, with a user specified timezone.
2904 : * Note that datetime values in Arrow arrays are always stored in UTC, and
2905 : * that the time zone flag used by GDAL to convert to UTC is the one of the
2906 : * OGRField::Date::TZFlag member at the OGRFeature level. The conversion
2907 : * to UTC of a OGRField::Date is only done if both the timezone indicated by
2908 : * OGRField::Date::TZFlag and the one at the OGRFieldDefn level (or set by
2909 : * this TIMEZONE option) are not unknown.
2910 : * Since GDAL 3.13, "mixed" can be used to create an Arrow structure field,
2911 : * following the "timestamp with offset" extension (https://github.com/apache/arrow/blob/main/docs/source/format/CanonicalExtensions.rst#timestamp-with-offset)
2912 : * and storing both a UTC timestamp and the offset in minutes from the UTC
2913 : * timezone.
2914 : * </li>
2915 : * <li>DATETIME_AS_STRING=YES/NO. Defaults to NO. Added in GDAL 3.11.
2916 : * Whether DateTime fields should be returned as a (normally ISO-8601
2917 : * formatted) string by drivers. The aim is to be able to handle mixed
2918 : * timezones (or timezone naive values) in the same column.
2919 : * All drivers must honour that option, and potentially fallback to the
2920 : * OGRLayer generic implementation if they cannot (which is the case for the
2921 : * Arrow, Parquet and ADBC drivers).
2922 : * When DATETIME_AS_STRING=YES, the TIMEZONE option is ignored.
2923 : * </li>
2924 : * <li>GEOMETRY_METADATA_ENCODING=OGC/GEOARROW (GDAL >= 3.8).
2925 : * The default is OGC, which will lead to setting
2926 : * the Arrow geometry column metadata to ARROW:extension:name=ogc.wkb.
2927 : * If setting to GEOMETRY_METADATA_ENCODING to GEOARROW,
2928 : * ARROW:extension:name=geoarrow.wkb and
2929 : * ARROW:extension:metadata={"crs": <projjson CRS representation>> are set.
2930 : * </li>
2931 : * </ul>
2932 : *
2933 : * The Arrow/Parquet drivers recognize the following option:
2934 : * <ul>
2935 : * <li>GEOMETRY_ENCODING=WKB. To force a fallback to the generic implementation
2936 : * when the native geometry encoding is not WKB. Otherwise the geometry
2937 : * will be returned with its native Arrow encoding
2938 : * (possibly using GeoArrow encoding).</li>
2939 : * </ul>
2940 : *
2941 : * @param hLayer Layer
2942 : * @param out_stream Output stream. Must *not* be NULL. The content of the
2943 : * structure does not need to be initialized.
2944 : * @param papszOptions NULL terminated list of key=value options.
2945 : * @return true in case of success.
2946 : * @since GDAL 3.6
2947 : */
2948 375 : bool OGR_L_GetArrowStream(OGRLayerH hLayer, struct ArrowArrayStream *out_stream,
2949 : char **papszOptions)
2950 : {
2951 375 : VALIDATE_POINTER1(hLayer, "OGR_L_GetArrowStream", false);
2952 375 : VALIDATE_POINTER1(out_stream, "OGR_L_GetArrowStream", false);
2953 :
2954 750 : return OGRLayer::FromHandle(hLayer)->GetArrowStream(out_stream,
2955 375 : papszOptions);
2956 : }
2957 :
2958 : /************************************************************************/
2959 : /* OGRParseArrowMetadata() */
2960 : /************************************************************************/
2961 :
2962 : std::map<std::string, std::string>
2963 208 : OGRParseArrowMetadata(const char *pabyMetadata)
2964 : {
2965 208 : std::map<std::string, std::string> oMetadata;
2966 : int32_t nKVP;
2967 208 : memcpy(&nKVP, pabyMetadata, sizeof(int32_t));
2968 208 : pabyMetadata += sizeof(int32_t);
2969 425 : for (int i = 0; i < nKVP; ++i)
2970 : {
2971 : int32_t nSizeKey;
2972 217 : memcpy(&nSizeKey, pabyMetadata, sizeof(int32_t));
2973 217 : pabyMetadata += sizeof(int32_t);
2974 434 : std::string osKey;
2975 217 : osKey.assign(pabyMetadata, nSizeKey);
2976 217 : pabyMetadata += nSizeKey;
2977 :
2978 : int32_t nSizeValue;
2979 217 : memcpy(&nSizeValue, pabyMetadata, sizeof(int32_t));
2980 217 : pabyMetadata += sizeof(int32_t);
2981 434 : std::string osValue;
2982 217 : osValue.assign(pabyMetadata, nSizeValue);
2983 217 : pabyMetadata += nSizeValue;
2984 :
2985 217 : oMetadata[osKey] = std::move(osValue);
2986 : }
2987 :
2988 416 : return oMetadata;
2989 : }
2990 :
2991 : /************************************************************************/
2992 : /* ParseDecimalFormat() */
2993 : /************************************************************************/
2994 :
2995 686 : static bool ParseDecimalFormat(const char *format, int &nPrecision, int &nScale,
2996 : int &nWidthInBytes)
2997 : {
2998 : // d:19,10 ==> decimal128 [precision 19, scale 10]
2999 : // d:19,10,NNN ==> decimal bitwidth = NNN [precision 19, scale 10]
3000 686 : nPrecision = 0;
3001 686 : nScale = 0;
3002 686 : nWidthInBytes = 128 / 8; // 128 bit
3003 686 : const char *pszFirstComma = strchr(format + 2, ',');
3004 686 : if (pszFirstComma)
3005 : {
3006 686 : nPrecision = atoi(format + 2);
3007 686 : nScale = atoi(pszFirstComma + 1);
3008 686 : const char *pszSecondComma = strchr(pszFirstComma + 1, ',');
3009 686 : if (pszSecondComma)
3010 : {
3011 274 : const int nWidthInBits = atoi(pszSecondComma + 1);
3012 274 : if ((nWidthInBits % 8) != 0)
3013 : {
3014 : // shouldn't happen for well-format schemas
3015 0 : nWidthInBytes = 0;
3016 0 : return false;
3017 : }
3018 : else
3019 : {
3020 274 : nWidthInBytes = nWidthInBits / 8;
3021 : }
3022 : }
3023 : }
3024 : else
3025 : {
3026 : // shouldn't happen for well-format schemas
3027 0 : nWidthInBytes = 0;
3028 0 : return false;
3029 : }
3030 686 : return true;
3031 : }
3032 :
3033 : /************************************************************************/
3034 : /* GetErrorIfUnsupportedDecimal() */
3035 : /************************************************************************/
3036 :
3037 55 : static const char *GetErrorIfUnsupportedDecimal(int nWidthInBytes,
3038 : int nPrecision)
3039 : {
3040 :
3041 55 : if (nWidthInBytes != 128 / 8 && nWidthInBytes != 256 / 8)
3042 : {
3043 0 : return "For decimal field, only width 128 and 256 are supported";
3044 : }
3045 :
3046 : // precision=19 fits on 64 bits
3047 55 : if (nPrecision <= 0 || nPrecision > 19)
3048 : {
3049 0 : return "For decimal field, only precision up to 19 is supported";
3050 : }
3051 :
3052 55 : return nullptr;
3053 : }
3054 :
3055 : /************************************************************************/
3056 : /* IsArrowTimeStampWithOffsetField() */
3057 : /************************************************************************/
3058 :
3059 1260 : static bool IsArrowTimeStampWithOffsetField(const struct ArrowSchema *schema)
3060 : {
3061 : bool ret =
3062 3377 : IsStructure(schema->format) && schema->n_children == 2 &&
3063 857 : IsTimestamp(schema->children[0]->format) &&
3064 0 : IsInt16(schema->children[1]->format) &&
3065 2520 : strcmp(schema->children[0]->name, ATSWO_TIMESTAMP_FIELD_NAME) == 0 &&
3066 0 : strcmp(schema->children[1]->name, ATSWO_OFFSET_MINUTES_FIELD_NAME) == 0;
3067 1260 : if (ret)
3068 : {
3069 0 : const auto oMetadata = OGRParseArrowMetadata(schema->metadata);
3070 0 : const auto oIter = oMetadata.find(ARROW_EXTENSION_NAME_KEY);
3071 0 : ret = oIter != oMetadata.end() &&
3072 0 : oIter->second == EXTENSION_NAME_ARROW_TIMESTAMP_WITH_OFFSET;
3073 : }
3074 1260 : return ret;
3075 : }
3076 :
3077 : /************************************************************************/
3078 : /* IsHandledSchema() */
3079 : /************************************************************************/
3080 :
3081 15760 : static bool IsHandledSchema(bool bTopLevel, const struct ArrowSchema *schema,
3082 : const std::string &osPrefix, bool bHasAttrQuery,
3083 : const CPLStringList &aosUsedFields)
3084 : {
3085 15760 : const char *format = schema->format;
3086 15760 : if (IsStructure(format))
3087 : {
3088 1246 : if (IsArrowTimeStampWithOffsetField(schema) &&
3089 1246 : aosUsedFields.FindString((osPrefix + schema->name).c_str()) >= 0)
3090 : {
3091 0 : return false;
3092 : }
3093 :
3094 12285 : for (int64_t i = 0; i < schema->n_children; ++i)
3095 : {
3096 44156 : if (!IsHandledSchema(/* bTopLevel = */ false,
3097 11039 : schema->children[static_cast<size_t>(i)],
3098 24772 : bTopLevel ? std::string()
3099 13733 : : osPrefix + schema->name + ".",
3100 : bHasAttrQuery, aosUsedFields))
3101 : {
3102 0 : return false;
3103 : }
3104 : }
3105 1246 : return true;
3106 : }
3107 :
3108 : // Lists or maps
3109 25169 : if (IsList(format) || IsLargeList(format) || IsFixedSizeList(format) ||
3110 10655 : IsMap(format))
3111 : {
3112 4566 : if (!IsHandledSchema(/* bTopLevel = */ false, schema->children[0],
3113 : osPrefix, bHasAttrQuery, aosUsedFields))
3114 : {
3115 0 : return false;
3116 : }
3117 : // For now, we can't filter on lists or maps
3118 4566 : if (aosUsedFields.FindString((osPrefix + schema->name).c_str()) >= 0)
3119 : {
3120 0 : CPLDebug("OGR",
3121 : "Field %s has unhandled format '%s' for an "
3122 : "attribute to filter on",
3123 0 : (osPrefix + schema->name).c_str(), format);
3124 0 : return false;
3125 : }
3126 4566 : return true;
3127 : }
3128 :
3129 9948 : const char *const apszHandledFormats[] = {
3130 : "b", // boolean
3131 : "c", // int8
3132 : "C", // uint8
3133 : "s", // int16
3134 : "S", // uint16
3135 : "i", // int32
3136 : "I", // uint32
3137 : "l", // int64
3138 : "L", // uint64
3139 : "e", // float16
3140 : "f", // float32
3141 : "g", // float64,
3142 : "z", // binary
3143 : "Z", // large binary
3144 : "u", // UTF-8 string
3145 : "U", // large UTF-8 string
3146 : "tdD", // date32[days]
3147 : "tdm", // date64[milliseconds]
3148 : "tts", //time32 [seconds]
3149 : "ttm", //time32 [milliseconds]
3150 : "ttu", //time64 [microseconds]
3151 : "ttn", //time64 [nanoseconds]
3152 : };
3153 :
3154 115231 : for (const char *pszHandledFormat : apszHandledFormats)
3155 : {
3156 113923 : if (strcmp(format, pszHandledFormat) == 0)
3157 : {
3158 8640 : return true;
3159 : }
3160 : }
3161 :
3162 1308 : if (IsDecimal(format))
3163 : {
3164 790 : if (bHasAttrQuery &&
3165 790 : aosUsedFields.FindString((osPrefix + schema->name).c_str()) >= 0)
3166 : {
3167 2 : int nPrecision = 0;
3168 2 : int nScale = 0;
3169 2 : int nWidthInBytes = 0;
3170 2 : if (!ParseDecimalFormat(format, nPrecision, nScale, nWidthInBytes))
3171 : {
3172 0 : CPLDebug("OGR", "%s",
3173 0 : (std::string("Invalid field format ") + format +
3174 0 : " for field " + osPrefix + schema->name)
3175 : .c_str());
3176 0 : return false;
3177 : }
3178 :
3179 : const char *pszError =
3180 2 : GetErrorIfUnsupportedDecimal(nWidthInBytes, nPrecision);
3181 2 : if (pszError)
3182 : {
3183 0 : CPLDebug("OGR", "%s", pszError);
3184 0 : return false;
3185 : }
3186 : }
3187 412 : return true;
3188 : }
3189 :
3190 896 : if (IsFixedWidthBinary(format) || IsTimestamp(format))
3191 : {
3192 896 : return true;
3193 : }
3194 :
3195 0 : CPLDebug("OGR", "Field %s has unhandled format '%s'",
3196 0 : (osPrefix + schema->name).c_str(), format);
3197 0 : return false;
3198 : }
3199 :
3200 : /************************************************************************/
3201 : /* OGRLayer::CanPostFilterArrowArray() */
3202 : /************************************************************************/
3203 :
3204 : /** Whether the PostFilterArrowArray() can work on the schema to remove
3205 : * rows that aren't selected by the spatial or attribute filter.
3206 : */
3207 155 : bool OGRLayer::CanPostFilterArrowArray(const struct ArrowSchema *schema) const
3208 : {
3209 155 : if (!IsHandledSchema(
3210 155 : /* bTopLevel=*/true, schema, std::string(),
3211 155 : m_poAttrQuery != nullptr,
3212 310 : m_poAttrQuery ? CPLStringList(m_poAttrQuery->GetUsedFields())
3213 : : CPLStringList()))
3214 : {
3215 0 : return false;
3216 : }
3217 :
3218 155 : if (m_poFilterGeom)
3219 : {
3220 22 : bool bFound = false;
3221 : const char *pszGeomFieldName =
3222 : const_cast<OGRLayer *>(this)
3223 22 : ->GetLayerDefn()
3224 22 : ->GetGeomFieldDefn(m_iGeomFieldFilter)
3225 22 : ->GetNameRef();
3226 839 : for (int64_t i = 0; i < schema->n_children; ++i)
3227 : {
3228 839 : const auto fieldSchema = schema->children[i];
3229 839 : if (strcmp(fieldSchema->name, pszGeomFieldName) == 0)
3230 : {
3231 23 : if (!IsBinary(fieldSchema->format) &&
3232 1 : !IsLargeBinary(fieldSchema->format))
3233 : {
3234 1 : CPLDebug("OGR", "Geometry field %s has handled format '%s'",
3235 : fieldSchema->name, fieldSchema->format);
3236 1 : return false;
3237 : }
3238 :
3239 : // Check if ARROW:extension:name = ogc.wkb
3240 21 : const char *pabyMetadata = fieldSchema->metadata;
3241 21 : if (!pabyMetadata)
3242 : {
3243 0 : CPLDebug(
3244 : "OGR",
3245 : "Geometry field %s lacks metadata in its schema field",
3246 : fieldSchema->name);
3247 0 : return false;
3248 : }
3249 :
3250 21 : const auto oMetadata = OGRParseArrowMetadata(pabyMetadata);
3251 21 : auto oIter = oMetadata.find(ARROW_EXTENSION_NAME_KEY);
3252 21 : if (oIter == oMetadata.end())
3253 : {
3254 0 : CPLDebug("OGR",
3255 : "Geometry field %s lacks "
3256 : "%s metadata "
3257 : "in its schema field",
3258 : fieldSchema->name, ARROW_EXTENSION_NAME_KEY);
3259 0 : return false;
3260 : }
3261 21 : if (oIter->second != EXTENSION_NAME_OGC_WKB &&
3262 0 : oIter->second != EXTENSION_NAME_GEOARROW_WKB)
3263 : {
3264 0 : CPLDebug("OGR",
3265 : "Geometry field %s has unexpected "
3266 : "%s = '%s' metadata "
3267 : "in its schema field",
3268 : fieldSchema->name, ARROW_EXTENSION_NAME_KEY,
3269 0 : oIter->second.c_str());
3270 0 : return false;
3271 : }
3272 :
3273 21 : bFound = true;
3274 21 : break;
3275 : }
3276 : }
3277 21 : if (!bFound)
3278 : {
3279 0 : CPLDebug("OGR", "Cannot find geometry field %s in schema",
3280 : pszGeomFieldName);
3281 0 : return false;
3282 : }
3283 : }
3284 :
3285 154 : return true;
3286 : }
3287 :
3288 : #if 0
3289 : /************************************************************************/
3290 : /* CheckValidityBuffer() */
3291 : /************************************************************************/
3292 :
3293 : static void CheckValidityBuffer(const struct ArrowArray *array)
3294 : {
3295 : if (array->null_count < 0)
3296 : return;
3297 : const uint8_t *pabyValidity =
3298 : static_cast<const uint8_t *>(const_cast<const void *>(array->buffers[0]));
3299 : if( !pabyValidity )
3300 : {
3301 : CPLAssert(array->null_count == 0);
3302 : return;
3303 : }
3304 : size_t null_count = 0;
3305 : const size_t nOffset = static_cast<size_t>(array->offset);
3306 : for(size_t i = 0; i < static_cast<size_t>(array->length); ++i )
3307 : {
3308 : if (!TestBit(pabyValidity, i + nOffset))
3309 : ++ null_count;
3310 : }
3311 : CPLAssert(static_cast<size_t>(array->null_count) == null_count);
3312 : }
3313 : #endif
3314 :
3315 : /************************************************************************/
3316 : /* CompactValidityBuffer() */
3317 : /************************************************************************/
3318 :
3319 7682 : static void CompactValidityBuffer(
3320 : const struct ArrowSchema *, struct ArrowArray *array, size_t iStart,
3321 : const std::vector<bool> &abyValidityFromFilters, size_t nNewLength)
3322 : {
3323 : // Invalidate null_count as the same validity buffer may be used when
3324 : // scrolling batches, and this creates confusion if we try to set it
3325 : // to different values among the batches
3326 7682 : if (array->null_count <= 0)
3327 : {
3328 4186 : array->null_count = -1;
3329 4186 : return;
3330 : }
3331 3496 : array->null_count = -1;
3332 :
3333 3496 : CPLAssert(static_cast<size_t>(array->length) >=
3334 : iStart + abyValidityFromFilters.size());
3335 3496 : uint8_t *pabyValidity =
3336 3496 : static_cast<uint8_t *>(const_cast<void *>(array->buffers[0]));
3337 3496 : const size_t nLength = abyValidityFromFilters.size();
3338 3496 : const size_t nOffset = static_cast<size_t>(array->offset);
3339 3496 : size_t j = iStart + nOffset;
3340 12883 : for (size_t i = 0; i < nLength && j < nNewLength + nOffset; ++i)
3341 : {
3342 9387 : if (abyValidityFromFilters[i])
3343 : {
3344 5823 : if (TestBit(pabyValidity, i + iStart + nOffset))
3345 4387 : SetBit(pabyValidity, j);
3346 : else
3347 1436 : UnsetBit(pabyValidity, j);
3348 5823 : ++j;
3349 : }
3350 : }
3351 : }
3352 :
3353 : /************************************************************************/
3354 : /* CompactBoolArray() */
3355 : /************************************************************************/
3356 :
3357 224 : static void CompactBoolArray(const struct ArrowSchema *schema,
3358 : struct ArrowArray *array, size_t iStart,
3359 : const std::vector<bool> &abyValidityFromFilters,
3360 : size_t nNewLength)
3361 : {
3362 224 : CPLAssert(array->n_children == 0);
3363 224 : CPLAssert(array->n_buffers == 2);
3364 224 : CPLAssert(static_cast<size_t>(array->length) >=
3365 : iStart + abyValidityFromFilters.size());
3366 :
3367 224 : const size_t nLength = abyValidityFromFilters.size();
3368 224 : const size_t nOffset = static_cast<size_t>(array->offset);
3369 224 : uint8_t *pabyData =
3370 224 : static_cast<uint8_t *>(const_cast<void *>(array->buffers[1]));
3371 224 : size_t j = iStart + nOffset;
3372 1147 : for (size_t i = 0; i < nLength; ++i)
3373 : {
3374 923 : if (abyValidityFromFilters[i])
3375 : {
3376 424 : if (TestBit(pabyData, i + iStart + nOffset))
3377 199 : SetBit(pabyData, j);
3378 : else
3379 225 : UnsetBit(pabyData, j);
3380 :
3381 424 : ++j;
3382 : }
3383 : }
3384 :
3385 224 : if (schema->flags & ARROW_FLAG_NULLABLE)
3386 224 : CompactValidityBuffer(schema, array, iStart, abyValidityFromFilters,
3387 : nNewLength);
3388 :
3389 224 : array->length = nNewLength;
3390 224 : }
3391 :
3392 : /************************************************************************/
3393 : /* CompactPrimitiveArray() */
3394 : /************************************************************************/
3395 :
3396 : template <class T>
3397 3575 : static void CompactPrimitiveArray(
3398 : const struct ArrowSchema *schema, struct ArrowArray *array, size_t iStart,
3399 : const std::vector<bool> &abyValidityFromFilters, size_t nNewLength)
3400 : {
3401 3575 : CPLAssert(array->n_children == 0);
3402 3575 : CPLAssert(array->n_buffers == 2);
3403 3575 : CPLAssert(static_cast<size_t>(array->length) >=
3404 : iStart + abyValidityFromFilters.size());
3405 :
3406 3575 : const size_t nLength = abyValidityFromFilters.size();
3407 3575 : const size_t nOffset = static_cast<size_t>(array->offset);
3408 3575 : T *paData =
3409 3575 : static_cast<T *>(const_cast<void *>(array->buffers[1])) + nOffset;
3410 3575 : size_t j = iStart;
3411 18134 : for (size_t i = 0; i < nLength; ++i)
3412 : {
3413 14559 : if (abyValidityFromFilters[i])
3414 : {
3415 6366 : paData[j] = paData[i + iStart];
3416 6366 : ++j;
3417 : }
3418 : }
3419 :
3420 3575 : if (schema->flags & ARROW_FLAG_NULLABLE)
3421 3564 : CompactValidityBuffer(schema, array, iStart, abyValidityFromFilters,
3422 : nNewLength);
3423 :
3424 3575 : array->length = nNewLength;
3425 3575 : }
3426 :
3427 : /************************************************************************/
3428 : /* CompactStringOrBinaryArray() */
3429 : /************************************************************************/
3430 :
3431 : template <class OffsetType>
3432 1187 : static void CompactStringOrBinaryArray(
3433 : const struct ArrowSchema *schema, struct ArrowArray *array, size_t iStart,
3434 : const std::vector<bool> &abyValidityFromFilters, size_t nNewLength)
3435 : {
3436 1187 : CPLAssert(array->n_children == 0);
3437 1187 : CPLAssert(array->n_buffers == 3);
3438 1187 : CPLAssert(static_cast<size_t>(array->length) >=
3439 : iStart + abyValidityFromFilters.size());
3440 :
3441 1187 : const size_t nLength = abyValidityFromFilters.size();
3442 1187 : const size_t nOffset = static_cast<size_t>(array->offset);
3443 1187 : OffsetType *panOffsets =
3444 1187 : static_cast<OffsetType *>(const_cast<void *>(array->buffers[1])) +
3445 : nOffset;
3446 1187 : GByte *pabyData =
3447 1187 : static_cast<GByte *>(const_cast<void *>(array->buffers[2]));
3448 1187 : size_t j = iStart;
3449 1187 : OffsetType nCurOffset = panOffsets[iStart];
3450 5103 : for (size_t i = 0; i < nLength; ++i)
3451 : {
3452 3916 : if (abyValidityFromFilters[i])
3453 : {
3454 1768 : const auto nStartOffset = panOffsets[i + iStart];
3455 1768 : const auto nEndOffset = panOffsets[i + iStart + 1];
3456 1768 : panOffsets[j] = nCurOffset;
3457 1768 : const auto nSize = static_cast<size_t>(nEndOffset - nStartOffset);
3458 1768 : if (nSize)
3459 : {
3460 1562 : if (nCurOffset < nStartOffset)
3461 : {
3462 636 : memmove(pabyData + nCurOffset, pabyData + nStartOffset,
3463 : nSize);
3464 : }
3465 1562 : nCurOffset += static_cast<OffsetType>(nSize);
3466 : }
3467 1768 : ++j;
3468 : }
3469 : }
3470 1187 : panOffsets[j] = nCurOffset;
3471 :
3472 1187 : if (schema->flags & ARROW_FLAG_NULLABLE)
3473 806 : CompactValidityBuffer(schema, array, iStart, abyValidityFromFilters,
3474 : nNewLength);
3475 :
3476 1187 : array->length = nNewLength;
3477 1187 : }
3478 :
3479 : /************************************************************************/
3480 : /* CompactFixedWidthArray() */
3481 : /************************************************************************/
3482 :
3483 : static void
3484 305 : CompactFixedWidthArray(const struct ArrowSchema *schema,
3485 : struct ArrowArray *array, int nWidth, size_t iStart,
3486 : const std::vector<bool> &abyValidityFromFilters,
3487 : size_t nNewLength)
3488 : {
3489 305 : CPLAssert(array->n_children == 0);
3490 305 : CPLAssert(array->n_buffers == 2);
3491 305 : CPLAssert(static_cast<size_t>(array->length) >=
3492 : iStart + abyValidityFromFilters.size());
3493 :
3494 305 : const size_t nLength = abyValidityFromFilters.size();
3495 305 : const size_t nOffset = static_cast<size_t>(array->offset);
3496 305 : GByte *pabyData =
3497 305 : static_cast<GByte *>(const_cast<void *>(array->buffers[1]));
3498 305 : size_t nStartOffset = (iStart + nOffset) * nWidth;
3499 305 : size_t nCurOffset = nStartOffset;
3500 1133 : for (size_t i = 0; i < nLength; ++i, nStartOffset += nWidth)
3501 : {
3502 828 : if (abyValidityFromFilters[i])
3503 : {
3504 391 : if (nCurOffset < nStartOffset)
3505 : {
3506 210 : memcpy(pabyData + nCurOffset, pabyData + nStartOffset, nWidth);
3507 : }
3508 391 : nCurOffset += nWidth;
3509 : }
3510 : }
3511 :
3512 305 : if (schema->flags & ARROW_FLAG_NULLABLE)
3513 305 : CompactValidityBuffer(schema, array, iStart, abyValidityFromFilters,
3514 : nNewLength);
3515 :
3516 305 : array->length = nNewLength;
3517 305 : }
3518 :
3519 : /************************************************************************/
3520 : /* CompactStructArray() */
3521 : /************************************************************************/
3522 :
3523 : static bool CompactArray(const struct ArrowSchema *schema,
3524 : struct ArrowArray *array, size_t iStart,
3525 : const std::vector<bool> &abyValidityFromFilters,
3526 : size_t nNewLength);
3527 :
3528 665 : static bool CompactStructArray(const struct ArrowSchema *schema,
3529 : struct ArrowArray *array, size_t iStart,
3530 : const std::vector<bool> &abyValidityFromFilters,
3531 : size_t nNewLength)
3532 : {
3533 : // The equality might not be strict in the case of when some sub-arrays
3534 : // are fully void !
3535 665 : CPLAssert(array->n_children <= schema->n_children);
3536 6718 : for (int64_t iField = 0; iField < array->n_children; ++iField)
3537 : {
3538 6053 : const auto psChildSchema = schema->children[iField];
3539 6053 : const auto psChildArray = array->children[iField];
3540 : // To please Arrow validation...
3541 6053 : const size_t nChildNewLength =
3542 6053 : static_cast<size_t>(array->offset) + nNewLength;
3543 6053 : if (psChildArray->length > array->length)
3544 : {
3545 120 : std::vector<bool> abyChildValidity(abyValidityFromFilters);
3546 120 : abyChildValidity.resize(
3547 120 : abyValidityFromFilters.size() +
3548 120 : static_cast<size_t>(psChildArray->length - array->length),
3549 : false);
3550 120 : if (!CompactArray(psChildSchema, psChildArray, iStart,
3551 : abyChildValidity, nChildNewLength))
3552 : {
3553 0 : return false;
3554 : }
3555 : }
3556 : else
3557 : {
3558 5933 : if (!CompactArray(psChildSchema, psChildArray, iStart,
3559 : abyValidityFromFilters, nChildNewLength))
3560 : {
3561 0 : return false;
3562 : }
3563 : }
3564 6053 : CPLAssert(psChildArray->length ==
3565 : static_cast<int64_t>(nChildNewLength));
3566 : }
3567 :
3568 665 : if (schema->flags & ARROW_FLAG_NULLABLE)
3569 201 : CompactValidityBuffer(schema, array, iStart, abyValidityFromFilters,
3570 : nNewLength);
3571 :
3572 665 : array->length = nNewLength;
3573 :
3574 665 : return true;
3575 : }
3576 :
3577 : /************************************************************************/
3578 : /* InvalidateNullCountRec() */
3579 : /************************************************************************/
3580 :
3581 570 : static void InvalidateNullCountRec(const struct ArrowSchema *schema,
3582 : struct ArrowArray *array)
3583 : {
3584 570 : if (schema->flags & ARROW_FLAG_NULLABLE)
3585 210 : array->null_count = -1;
3586 960 : for (int i = 0; i < array->n_children; ++i)
3587 390 : InvalidateNullCountRec(schema->children[i], array->children[i]);
3588 570 : }
3589 :
3590 : /************************************************************************/
3591 : /* CompactListArray() */
3592 : /************************************************************************/
3593 :
3594 : template <class OffsetType>
3595 1773 : static bool CompactListArray(const struct ArrowSchema *schema,
3596 : struct ArrowArray *array, size_t iStart,
3597 : const std::vector<bool> &abyValidityFromFilters,
3598 : size_t nNewLength)
3599 : {
3600 1773 : CPLAssert(static_cast<size_t>(array->length) >=
3601 : iStart + abyValidityFromFilters.size());
3602 1773 : CPLAssert(array->n_children == 1);
3603 1773 : CPLAssert(array->n_buffers == 2);
3604 :
3605 1773 : const auto psChildSchema = schema->children[0];
3606 1773 : const auto psChildArray = array->children[0];
3607 :
3608 1773 : const size_t nLength = abyValidityFromFilters.size();
3609 1773 : const size_t nOffset = static_cast<size_t>(array->offset);
3610 1773 : OffsetType *panOffsets =
3611 1773 : static_cast<OffsetType *>(const_cast<void *>(array->buffers[1])) +
3612 : nOffset;
3613 :
3614 1773 : if (panOffsets[iStart + nLength] > panOffsets[iStart])
3615 : {
3616 3186 : std::vector<bool> abyChildValidity(
3617 1593 : static_cast<size_t>(panOffsets[iStart + nLength] -
3618 1593 : panOffsets[iStart]),
3619 : true);
3620 1593 : size_t j = iStart;
3621 1593 : OffsetType nCurOffset = panOffsets[iStart];
3622 6694 : for (size_t i = 0; i < nLength; ++i)
3623 : {
3624 5101 : if (abyValidityFromFilters[i])
3625 : {
3626 2142 : const auto nSize =
3627 2142 : panOffsets[i + iStart + 1] - panOffsets[i + iStart];
3628 2142 : panOffsets[j] = nCurOffset;
3629 2142 : nCurOffset += nSize;
3630 2142 : ++j;
3631 : }
3632 : else
3633 : {
3634 2959 : const auto nStartOffset = panOffsets[i + iStart];
3635 2959 : const auto nEndOffset = panOffsets[i + iStart + 1];
3636 2959 : if (nStartOffset != nEndOffset)
3637 : {
3638 3073 : if (nStartOffset >=
3639 1538 : panOffsets[iStart] + abyChildValidity.size())
3640 : {
3641 : // shouldn't happen in sane arrays...
3642 0 : CPLError(CE_Failure, CPLE_AppDefined,
3643 : "nStartOffset >= panOffsets[iStart] + "
3644 : "abyChildValidity.size()");
3645 0 : return false;
3646 : }
3647 : // nEndOffset might be equal to abyChildValidity.size()
3648 3073 : if (nEndOffset >
3649 1538 : panOffsets[iStart] + abyChildValidity.size())
3650 : {
3651 : // shouldn't happen in sane arrays...
3652 0 : CPLError(CE_Failure, CPLE_AppDefined,
3653 : "nEndOffset > panOffsets[iStart] + "
3654 : "abyChildValidity.size()");
3655 0 : return false;
3656 : }
3657 1538 : for (auto k = nStartOffset - panOffsets[iStart];
3658 4652 : k < nEndOffset - panOffsets[iStart]; ++k)
3659 3114 : abyChildValidity[static_cast<size_t>(k)] = false;
3660 : }
3661 : }
3662 : }
3663 1593 : panOffsets[j] = nCurOffset;
3664 1593 : const size_t nChildNewLength = static_cast<size_t>(panOffsets[j]);
3665 : // To please Arrow validation
3666 4552 : for (; j < iStart + nLength; ++j)
3667 2959 : panOffsets[j] = nCurOffset;
3668 :
3669 1593 : if (!CompactArray(psChildSchema, psChildArray,
3670 1593 : static_cast<size_t>(panOffsets[iStart]),
3671 : abyChildValidity, nChildNewLength))
3672 0 : return false;
3673 :
3674 1593 : CPLAssert(psChildArray->length ==
3675 : static_cast<int64_t>(nChildNewLength));
3676 : }
3677 : else
3678 : {
3679 180 : InvalidateNullCountRec(psChildSchema, psChildArray);
3680 : }
3681 :
3682 1773 : if (schema->flags & ARROW_FLAG_NULLABLE)
3683 1773 : CompactValidityBuffer(schema, array, iStart, abyValidityFromFilters,
3684 : nNewLength);
3685 :
3686 1773 : array->length = nNewLength;
3687 :
3688 1773 : return true;
3689 : }
3690 :
3691 : /************************************************************************/
3692 : /* CompactFixedSizeListArray() */
3693 : /************************************************************************/
3694 :
3695 : static bool
3696 809 : CompactFixedSizeListArray(const struct ArrowSchema *schema,
3697 : struct ArrowArray *array, size_t N, size_t iStart,
3698 : const std::vector<bool> &abyValidityFromFilters,
3699 : size_t nNewLength)
3700 : {
3701 809 : CPLAssert(static_cast<size_t>(array->length) >=
3702 : iStart + abyValidityFromFilters.size());
3703 809 : CPLAssert(array->n_children == 1);
3704 :
3705 809 : const auto psChildSchema = schema->children[0];
3706 809 : const auto psChildArray = array->children[0];
3707 :
3708 809 : const size_t nLength = abyValidityFromFilters.size();
3709 809 : const size_t nOffset = static_cast<size_t>(array->offset);
3710 1618 : std::vector<bool> abyChildValidity(N * nLength, true);
3711 809 : size_t nChildNewLength = (iStart + nOffset) * N;
3712 809 : size_t nSrcLength = 0;
3713 3198 : for (size_t i = 0; i < nLength; ++i)
3714 : {
3715 2389 : if (abyValidityFromFilters[i])
3716 : {
3717 1015 : nChildNewLength += N;
3718 1015 : nSrcLength++;
3719 : }
3720 : else
3721 : {
3722 1374 : const size_t nStartOffset = i * N;
3723 1374 : const size_t nEndOffset = (i + 1) * N;
3724 4122 : for (size_t k = nStartOffset; k < nEndOffset; ++k)
3725 2748 : abyChildValidity[k] = false;
3726 : }
3727 : }
3728 809 : CPL_IGNORE_RET_VAL(nSrcLength);
3729 809 : CPLAssert(iStart + nSrcLength == nNewLength);
3730 :
3731 809 : if (!CompactArray(psChildSchema, psChildArray, (iStart + nOffset) * N,
3732 : abyChildValidity, nChildNewLength))
3733 0 : return false;
3734 :
3735 809 : if (schema->flags & ARROW_FLAG_NULLABLE)
3736 809 : CompactValidityBuffer(schema, array, iStart, abyValidityFromFilters,
3737 : nNewLength);
3738 :
3739 809 : array->length = nNewLength;
3740 :
3741 809 : CPLAssert(psChildArray->length >=
3742 : static_cast<int64_t>(N) * (array->length + array->offset));
3743 :
3744 809 : return true;
3745 : }
3746 :
3747 : /************************************************************************/
3748 : /* CompactMapArray() */
3749 : /************************************************************************/
3750 :
3751 561 : static bool CompactMapArray(const struct ArrowSchema *schema,
3752 : struct ArrowArray *array, size_t iStart,
3753 : const std::vector<bool> &abyValidityFromFilters,
3754 : size_t nNewLength)
3755 : {
3756 561 : return CompactListArray<uint32_t>(schema, array, iStart,
3757 561 : abyValidityFromFilters, nNewLength);
3758 : }
3759 :
3760 : /************************************************************************/
3761 : /* CompactArray() */
3762 : /************************************************************************/
3763 :
3764 8455 : static bool CompactArray(const struct ArrowSchema *schema,
3765 : struct ArrowArray *array, size_t iStart,
3766 : const std::vector<bool> &abyValidityFromFilters,
3767 : size_t nNewLength)
3768 : {
3769 8455 : const char *format = schema->format;
3770 :
3771 8455 : if (IsStructure(format))
3772 : {
3773 582 : if (!CompactStructArray(schema, array, iStart, abyValidityFromFilters,
3774 : nNewLength))
3775 0 : return false;
3776 : }
3777 7873 : else if (IsList(format))
3778 : {
3779 1209 : if (!CompactListArray<uint32_t>(schema, array, iStart,
3780 : abyValidityFromFilters, nNewLength))
3781 0 : return false;
3782 : }
3783 6664 : else if (IsLargeList(format))
3784 : {
3785 3 : if (!CompactListArray<uint64_t>(schema, array, iStart,
3786 : abyValidityFromFilters, nNewLength))
3787 0 : return false;
3788 : }
3789 6661 : else if (IsMap(format))
3790 : {
3791 561 : if (!CompactMapArray(schema, array, iStart, abyValidityFromFilters,
3792 : nNewLength))
3793 0 : return false;
3794 : }
3795 6100 : else if (IsFixedSizeList(format))
3796 : {
3797 809 : const int N = GetFixedSizeList(format);
3798 809 : if (N <= 0)
3799 0 : return false;
3800 809 : if (!CompactFixedSizeListArray(schema, array, static_cast<size_t>(N),
3801 : iStart, abyValidityFromFilters,
3802 : nNewLength))
3803 0 : return false;
3804 : }
3805 5291 : else if (IsBoolean(format))
3806 : {
3807 224 : CompactBoolArray(schema, array, iStart, abyValidityFromFilters,
3808 : nNewLength);
3809 : }
3810 5067 : else if (IsInt8(format) || IsUInt8(format))
3811 : {
3812 444 : CompactPrimitiveArray<uint8_t>(schema, array, iStart,
3813 : abyValidityFromFilters, nNewLength);
3814 : }
3815 4623 : else if (IsInt16(format) || IsUInt16(format) || IsFloat16(format))
3816 : {
3817 458 : CompactPrimitiveArray<uint16_t>(schema, array, iStart,
3818 : abyValidityFromFilters, nNewLength);
3819 : }
3820 8037 : else if (IsInt32(format) || IsUInt32(format) || IsFloat32(format) ||
3821 11539 : strcmp(format, "tdD") == 0 || strcmp(format, "tts") == 0 ||
3822 3502 : strcmp(format, "ttm") == 0)
3823 : {
3824 794 : CompactPrimitiveArray<uint32_t>(schema, array, iStart,
3825 : abyValidityFromFilters, nNewLength);
3826 : }
3827 6023 : else if (IsInt64(format) || IsUInt64(format) || IsFloat64(format) ||
3828 1997 : strcmp(format, "tdm") == 0 || strcmp(format, "ttu") == 0 ||
3829 6023 : strcmp(format, "ttn") == 0 || strncmp(format, "ts", 2) == 0)
3830 : {
3831 1879 : CompactPrimitiveArray<uint64_t>(schema, array, iStart,
3832 : abyValidityFromFilters, nNewLength);
3833 : }
3834 1492 : else if (IsString(format) || IsBinary(format))
3835 : {
3836 983 : CompactStringOrBinaryArray<uint32_t>(
3837 : schema, array, iStart, abyValidityFromFilters, nNewLength);
3838 : }
3839 509 : else if (IsLargeString(format) || IsLargeBinary(format))
3840 : {
3841 204 : CompactStringOrBinaryArray<uint64_t>(
3842 : schema, array, iStart, abyValidityFromFilters, nNewLength);
3843 : }
3844 305 : else if (IsFixedWidthBinary(format))
3845 : {
3846 67 : const int nWidth = GetFixedWithBinary(format);
3847 67 : CompactFixedWidthArray(schema, array, nWidth, iStart,
3848 : abyValidityFromFilters, nNewLength);
3849 : }
3850 238 : else if (IsDecimal(format))
3851 : {
3852 238 : int nPrecision = 0;
3853 238 : int nScale = 0;
3854 238 : int nWidthInBytes = 0;
3855 238 : if (!ParseDecimalFormat(format, nPrecision, nScale, nWidthInBytes))
3856 : {
3857 0 : CPLError(CE_Failure, CPLE_AppDefined,
3858 : "Unexpected error in PostFilterArrowArray(): unhandled "
3859 : "field format: %s",
3860 : format);
3861 :
3862 0 : return false;
3863 : }
3864 238 : CompactFixedWidthArray(schema, array, nWidthInBytes, iStart,
3865 : abyValidityFromFilters, nNewLength);
3866 : }
3867 : else
3868 : {
3869 0 : CPLError(CE_Failure, CPLE_AppDefined,
3870 : "Unexpected error in CompactArray(): unhandled "
3871 : "field format: %s",
3872 : format);
3873 0 : return false;
3874 : }
3875 :
3876 8455 : return true;
3877 : }
3878 :
3879 : /************************************************************************/
3880 : /* FillValidityArrayFromWKBArray() */
3881 : /************************************************************************/
3882 :
3883 : template <class OffsetType>
3884 : static size_t
3885 21 : FillValidityArrayFromWKBArray(struct ArrowArray *array, const OGRLayer *poLayer,
3886 : std::vector<bool> &abyValidityFromFilters)
3887 : {
3888 21 : const size_t nLength = static_cast<size_t>(array->length);
3889 14 : const uint8_t *pabyValidity =
3890 21 : array->null_count == 0
3891 : ? nullptr
3892 7 : : static_cast<const uint8_t *>(array->buffers[0]);
3893 21 : const size_t nOffset = static_cast<size_t>(array->offset);
3894 21 : const OffsetType *panOffsets =
3895 21 : static_cast<const OffsetType *>(array->buffers[1]) + nOffset;
3896 21 : const GByte *pabyData = static_cast<const GByte *>(array->buffers[2]);
3897 21 : OGREnvelope sEnvelope;
3898 21 : abyValidityFromFilters.resize(nLength);
3899 21 : size_t nCountIntersecting = 0;
3900 138 : for (size_t i = 0; i < nLength; ++i)
3901 : {
3902 117 : if (!pabyValidity || TestBit(pabyValidity, i + nOffset))
3903 : {
3904 110 : const GByte *pabyWKB = pabyData + panOffsets[i];
3905 110 : const size_t nWKBSize =
3906 110 : static_cast<size_t>(panOffsets[i + 1] - panOffsets[i]);
3907 110 : if (poLayer->FilterWKBGeometry(pabyWKB, nWKBSize,
3908 : /* bEnvelopeAlreadySet=*/false,
3909 : sEnvelope))
3910 : {
3911 29 : abyValidityFromFilters[i] = true;
3912 29 : nCountIntersecting++;
3913 : }
3914 : }
3915 : }
3916 21 : return nCountIntersecting;
3917 : }
3918 :
3919 : /************************************************************************/
3920 : /* ArrowTimestampToOGRDateTime() */
3921 : /************************************************************************/
3922 :
3923 107 : static void ArrowTimestampToOGRDateTime(int64_t nTimestamp,
3924 : int nInvFactorToSecond,
3925 : const char *pszTZ, OGRFeature &oFeature,
3926 : int iField)
3927 : {
3928 107 : double floatingPart = 0;
3929 107 : if (nInvFactorToSecond)
3930 : {
3931 107 : floatingPart =
3932 107 : (nTimestamp % nInvFactorToSecond) / double(nInvFactorToSecond);
3933 107 : nTimestamp /= nInvFactorToSecond;
3934 : }
3935 107 : int nTZFlag = 0;
3936 107 : const size_t nTZLen = strlen(pszTZ);
3937 107 : if ((nTZLen == 3 && strcmp(pszTZ, "UTC") == 0) ||
3938 0 : (nTZLen == 7 && strcmp(pszTZ, "Etc/UTC") == 0))
3939 : {
3940 17 : nTZFlag = 100;
3941 : }
3942 90 : else if (nTZLen == 6 && (pszTZ[0] == '+' || pszTZ[0] == '-') &&
3943 33 : pszTZ[3] == ':')
3944 : {
3945 33 : int nTZHour = atoi(pszTZ + 1);
3946 33 : int nTZMin = atoi(pszTZ + 4);
3947 33 : if (nTZHour >= 0 && nTZHour <= 14 && nTZMin >= 0 && nTZMin < 60 &&
3948 33 : (nTZMin % 15) == 0)
3949 : {
3950 33 : nTZFlag = (nTZHour * 4) + (nTZMin / 15);
3951 33 : if (pszTZ[0] == '+')
3952 : {
3953 24 : nTZFlag = 100 + nTZFlag;
3954 24 : nTimestamp += nTZHour * 3600 + nTZMin * 60;
3955 : }
3956 : else
3957 : {
3958 9 : nTZFlag = 100 - nTZFlag;
3959 9 : nTimestamp -= nTZHour * 3600 + nTZMin * 60;
3960 : }
3961 : }
3962 : }
3963 : struct tm dt;
3964 107 : CPLUnixTimeToYMDHMS(nTimestamp, &dt);
3965 107 : oFeature.SetField(iField, dt.tm_year + 1900, dt.tm_mon + 1, dt.tm_mday,
3966 : dt.tm_hour, dt.tm_min,
3967 107 : static_cast<float>(dt.tm_sec + floatingPart), nTZFlag);
3968 107 : }
3969 :
3970 : /************************************************************************/
3971 : /* BuildMapFieldNameToArrowPath() */
3972 : /************************************************************************/
3973 :
3974 : static void
3975 334 : BuildMapFieldNameToArrowPath(const struct ArrowSchema *schema,
3976 : std::map<std::string, std::vector<int>> &oMap,
3977 : const std::string &osPrefix,
3978 : std::vector<int> &anArrowPath)
3979 : {
3980 7833 : for (int64_t i = 0; i < schema->n_children; ++i)
3981 : {
3982 7499 : auto psChild = schema->children[i];
3983 7499 : anArrowPath.push_back(static_cast<int>(i));
3984 7499 : if (IsStructure(psChild->format))
3985 : {
3986 400 : std::string osNewPrefix(osPrefix);
3987 200 : osNewPrefix += psChild->name;
3988 200 : osNewPrefix += ".";
3989 200 : BuildMapFieldNameToArrowPath(psChild, oMap, osNewPrefix,
3990 : anArrowPath);
3991 : }
3992 : else
3993 : {
3994 7299 : oMap[osPrefix + psChild->name] = anArrowPath;
3995 : }
3996 7499 : anArrowPath.pop_back();
3997 : }
3998 334 : }
3999 :
4000 : /************************************************************************/
4001 : /* FillFieldList() */
4002 : /************************************************************************/
4003 :
4004 : template <typename ListOffsetType, typename ArrowType,
4005 : typename OGRType = ArrowType>
4006 167 : inline static void FillFieldList(const struct ArrowArray *array,
4007 : int iOGRFieldIdx, size_t nOffsettedIndex,
4008 : const struct ArrowArray *childArray,
4009 : OGRFeature &oFeature)
4010 : {
4011 167 : const auto panOffsets =
4012 167 : static_cast<const ListOffsetType *>(array->buffers[1]) +
4013 : nOffsettedIndex;
4014 334 : std::vector<OGRType> aValues;
4015 167 : const auto *paValues =
4016 167 : static_cast<const ArrowType *>(childArray->buffers[1]);
4017 167 : for (size_t i = static_cast<size_t>(panOffsets[0]);
4018 509 : i < static_cast<size_t>(panOffsets[1]); ++i)
4019 : {
4020 342 : aValues.push_back(static_cast<OGRType>(paValues[i]));
4021 : }
4022 167 : oFeature.SetField(iOGRFieldIdx, static_cast<int>(aValues.size()),
4023 : aValues.data());
4024 167 : }
4025 :
4026 : /************************************************************************/
4027 : /* FillFieldListFromBool() */
4028 : /************************************************************************/
4029 :
4030 : template <typename ListOffsetType>
4031 : inline static void
4032 16 : FillFieldListFromBool(const struct ArrowArray *array, int iOGRFieldIdx,
4033 : size_t nOffsettedIndex,
4034 : const struct ArrowArray *childArray, OGRFeature &oFeature)
4035 : {
4036 16 : const auto panOffsets =
4037 16 : static_cast<const ListOffsetType *>(array->buffers[1]) +
4038 : nOffsettedIndex;
4039 32 : std::vector<int> aValues;
4040 16 : const auto *paValues = static_cast<const uint8_t *>(childArray->buffers[1]);
4041 16 : for (size_t i = static_cast<size_t>(panOffsets[0]);
4042 48 : i < static_cast<size_t>(panOffsets[1]); ++i)
4043 : {
4044 32 : aValues.push_back(TestBit(paValues, i) ? 1 : 0);
4045 : }
4046 16 : oFeature.SetField(iOGRFieldIdx, static_cast<int>(aValues.size()),
4047 16 : aValues.data());
4048 16 : }
4049 :
4050 : /************************************************************************/
4051 : /* FillFieldListFromHalfFloat() */
4052 : /************************************************************************/
4053 :
4054 : template <typename ListOffsetType>
4055 8 : inline static void FillFieldListFromHalfFloat(
4056 : const struct ArrowArray *array, int iOGRFieldIdx, size_t nOffsettedIndex,
4057 : const struct ArrowArray *childArray, OGRFeature &oFeature)
4058 : {
4059 8 : const auto panOffsets =
4060 8 : static_cast<const ListOffsetType *>(array->buffers[1]) +
4061 : nOffsettedIndex;
4062 16 : std::vector<double> aValues;
4063 8 : const auto *paValues =
4064 8 : static_cast<const uint16_t *>(childArray->buffers[1]);
4065 8 : for (size_t i = static_cast<size_t>(panOffsets[0]);
4066 24 : i < static_cast<size_t>(panOffsets[1]); ++i)
4067 : {
4068 16 : const auto nFloat16AsUInt32 = CPLHalfToFloat(paValues[i]);
4069 : float f;
4070 16 : memcpy(&f, &nFloat16AsUInt32, sizeof(f));
4071 16 : aValues.push_back(static_cast<double>(f));
4072 : }
4073 8 : oFeature.SetField(iOGRFieldIdx, static_cast<int>(aValues.size()),
4074 8 : aValues.data());
4075 8 : }
4076 :
4077 : /************************************************************************/
4078 : /* FillFieldListFromString() */
4079 : /************************************************************************/
4080 :
4081 : template <typename ListOffsetType, typename StringOffsetType>
4082 32 : inline static void FillFieldListFromString(const struct ArrowArray *array,
4083 : int iOGRFieldIdx,
4084 : size_t nOffsettedIndex,
4085 : const struct ArrowArray *childArray,
4086 : OGRFeature &oFeature)
4087 : {
4088 32 : const auto panOffsets =
4089 32 : static_cast<const ListOffsetType *>(array->buffers[1]) +
4090 : nOffsettedIndex;
4091 64 : CPLStringList aosVals;
4092 32 : const auto panSubOffsets =
4093 32 : static_cast<const StringOffsetType *>(childArray->buffers[1]);
4094 32 : const char *pszValues = static_cast<const char *>(childArray->buffers[2]);
4095 64 : std::string osTmp;
4096 90 : for (size_t i = static_cast<size_t>(panOffsets[0]);
4097 90 : i < static_cast<size_t>(panOffsets[1]); ++i)
4098 : {
4099 58 : osTmp.assign(
4100 58 : pszValues + panSubOffsets[i],
4101 58 : static_cast<size_t>(panSubOffsets[i + 1] - panSubOffsets[i]));
4102 58 : aosVals.AddString(osTmp.c_str());
4103 : }
4104 32 : oFeature.SetField(iOGRFieldIdx, aosVals.List());
4105 32 : }
4106 :
4107 : /************************************************************************/
4108 : /* FillFieldFixedSizeList() */
4109 : /************************************************************************/
4110 :
4111 : template <typename ArrowType, typename OGRType = ArrowType>
4112 120 : inline static void FillFieldFixedSizeList(
4113 : const struct ArrowArray *, int iOGRFieldIdx, size_t nOffsettedIndex,
4114 : const int nItems, const struct ArrowArray *childArray, OGRFeature &oFeature)
4115 : {
4116 240 : std::vector<OGRType> aValues;
4117 120 : const auto *paValues =
4118 120 : static_cast<const ArrowType *>(childArray->buffers[1]) +
4119 120 : childArray->offset + nOffsettedIndex * nItems;
4120 360 : for (int i = 0; i < nItems; ++i)
4121 : {
4122 240 : aValues.push_back(static_cast<OGRType>(paValues[i]));
4123 : }
4124 120 : oFeature.SetField(iOGRFieldIdx, static_cast<int>(aValues.size()),
4125 : aValues.data());
4126 120 : }
4127 :
4128 : /************************************************************************/
4129 : /* FillFieldFixedSizeListString() */
4130 : /************************************************************************/
4131 :
4132 : template <typename StringOffsetType>
4133 17 : inline static void FillFieldFixedSizeListString(
4134 : const struct ArrowArray *, int iOGRFieldIdx, size_t nOffsettedIndex,
4135 : const int nItems, const struct ArrowArray *childArray, OGRFeature &oFeature)
4136 : {
4137 34 : CPLStringList aosVals;
4138 17 : const auto panSubOffsets =
4139 17 : static_cast<const StringOffsetType *>(childArray->buffers[1]) +
4140 17 : childArray->offset + nOffsettedIndex * nItems;
4141 17 : const char *pszValues = static_cast<const char *>(childArray->buffers[2]);
4142 34 : std::string osTmp;
4143 51 : for (int i = 0; i < nItems; ++i)
4144 : {
4145 34 : osTmp.assign(
4146 34 : pszValues + panSubOffsets[i],
4147 34 : static_cast<size_t>(panSubOffsets[i + 1] - panSubOffsets[i]));
4148 34 : aosVals.AddString(osTmp.c_str());
4149 : }
4150 17 : oFeature.SetField(iOGRFieldIdx, aosVals.List());
4151 17 : }
4152 :
4153 : /************************************************************************/
4154 : /* GetValue() */
4155 : /************************************************************************/
4156 :
4157 : template <typename ArrowType>
4158 245 : inline static ArrowType GetValue(const struct ArrowArray *array,
4159 : size_t iFeature)
4160 : {
4161 245 : const auto *panValues = static_cast<const ArrowType *>(array->buffers[1]);
4162 245 : return panValues[iFeature + array->offset];
4163 : }
4164 :
4165 12 : template <> bool GetValue<bool>(const struct ArrowArray *array, size_t iFeature)
4166 : {
4167 12 : const auto *pabyValues = static_cast<const uint8_t *>(array->buffers[1]);
4168 12 : return TestBit(pabyValues, iFeature + static_cast<size_t>(array->offset));
4169 : }
4170 :
4171 : /************************************************************************/
4172 : /* GetValueFloat16() */
4173 : /************************************************************************/
4174 :
4175 23 : static float GetValueFloat16(const struct ArrowArray *array, const size_t nIdx)
4176 : {
4177 23 : const auto *panValues = static_cast<const uint16_t *>(array->buffers[1]);
4178 : const auto nFloat16AsUInt32 =
4179 23 : CPLHalfToFloat(panValues[nIdx + array->offset]);
4180 : float f;
4181 23 : memcpy(&f, &nFloat16AsUInt32, sizeof(f));
4182 23 : return f;
4183 : }
4184 :
4185 : /************************************************************************/
4186 : /* GetValueDecimal() */
4187 : /************************************************************************/
4188 :
4189 71 : static double GetValueDecimal(const struct ArrowArray *array,
4190 : const int nWidthIn64BitWord, const int nScale,
4191 : const size_t nIdx)
4192 : {
4193 : #ifdef CPL_LSB
4194 71 : const auto nIdxIn64BitWord = nIdx * nWidthIn64BitWord;
4195 : #else
4196 : const auto nIdxIn64BitWord =
4197 : nIdx * nWidthIn64BitWord + nWidthIn64BitWord - 1;
4198 : #endif
4199 71 : const auto *panValues = static_cast<const int64_t *>(array->buffers[1]);
4200 71 : const auto nVal =
4201 71 : panValues[nIdxIn64BitWord + array->offset * nWidthIn64BitWord];
4202 71 : return static_cast<double>(nVal) * std::pow(10.0, -nScale);
4203 : }
4204 :
4205 : /************************************************************************/
4206 : /* GetString() */
4207 : /************************************************************************/
4208 :
4209 : template <class OffsetType>
4210 33 : static std::string GetString(const struct ArrowArray *array, const size_t nIdx)
4211 : {
4212 33 : const OffsetType *panOffsets =
4213 33 : static_cast<const OffsetType *>(array->buffers[1]) +
4214 33 : static_cast<size_t>(array->offset) + nIdx;
4215 33 : const char *pabyStr = static_cast<const char *>(array->buffers[2]);
4216 33 : std::string osStr;
4217 33 : osStr.assign(pabyStr + static_cast<size_t>(panOffsets[0]),
4218 33 : static_cast<size_t>(panOffsets[1] - panOffsets[0]));
4219 33 : return osStr;
4220 : }
4221 :
4222 : /************************************************************************/
4223 : /* GetBinaryAsBase64() */
4224 : /************************************************************************/
4225 :
4226 : template <class OffsetType>
4227 8 : static std::string GetBinaryAsBase64(const struct ArrowArray *array,
4228 : const size_t nIdx)
4229 : {
4230 8 : const OffsetType *panOffsets =
4231 8 : static_cast<const OffsetType *>(array->buffers[1]) +
4232 8 : static_cast<size_t>(array->offset) + nIdx;
4233 8 : const GByte *pabyData = static_cast<const GByte *>(array->buffers[2]);
4234 8 : const size_t nLen = static_cast<size_t>(panOffsets[1] - panOffsets[0]);
4235 8 : if (nLen > static_cast<size_t>(std::numeric_limits<int>::max()))
4236 : {
4237 0 : CPLError(CE_Failure, CPLE_AppDefined, "Too large binary");
4238 0 : return std::string();
4239 : }
4240 16 : char *pszVal = CPLBase64Encode(
4241 8 : static_cast<int>(nLen), pabyData + static_cast<size_t>(panOffsets[0]));
4242 16 : std::string osStr(pszVal);
4243 8 : CPLFree(pszVal);
4244 8 : return osStr;
4245 : }
4246 :
4247 : /************************************************************************/
4248 : /* GetValueFixedWithBinaryAsBase64() */
4249 : /************************************************************************/
4250 :
4251 : static std::string
4252 4 : GetValueFixedWithBinaryAsBase64(const struct ArrowArray *array,
4253 : const int nWidth, const size_t nIdx)
4254 : {
4255 4 : const GByte *pabyData = static_cast<const GByte *>(array->buffers[1]);
4256 8 : char *pszVal = CPLBase64Encode(
4257 : nWidth,
4258 4 : pabyData + (static_cast<size_t>(array->offset) + nIdx) * nWidth);
4259 4 : std::string osStr(pszVal);
4260 4 : CPLFree(pszVal);
4261 4 : return osStr;
4262 : }
4263 :
4264 : static CPLJSONObject GetObjectAsJSON(const struct ArrowSchema *schema,
4265 : const struct ArrowArray *array,
4266 : const size_t nIdx);
4267 :
4268 : /************************************************************************/
4269 : /* AddToArray() */
4270 : /************************************************************************/
4271 :
4272 142 : static void AddToArray(CPLJSONArray &oArray, const struct ArrowSchema *schema,
4273 : const struct ArrowArray *array, const size_t nIdx)
4274 : {
4275 142 : if (IsBoolean(schema->format))
4276 7 : oArray.Add(GetValue<bool>(array, nIdx));
4277 135 : else if (IsUInt8(schema->format))
4278 13 : oArray.Add(GetValue<uint8_t>(array, nIdx));
4279 122 : else if (IsInt8(schema->format))
4280 7 : oArray.Add(GetValue<int8_t>(array, nIdx));
4281 115 : else if (IsUInt16(schema->format))
4282 7 : oArray.Add(GetValue<uint16_t>(array, nIdx));
4283 108 : else if (IsInt16(schema->format))
4284 7 : oArray.Add(GetValue<int16_t>(array, nIdx));
4285 101 : else if (IsUInt32(schema->format))
4286 7 : oArray.Add(static_cast<GIntBig>(GetValue<uint32_t>(array, nIdx)));
4287 94 : else if (IsInt32(schema->format))
4288 7 : oArray.Add(GetValue<int32_t>(array, nIdx));
4289 87 : else if (IsUInt64(schema->format))
4290 7 : oArray.Add(GetValue<uint64_t>(array, nIdx));
4291 80 : else if (IsInt64(schema->format))
4292 7 : oArray.Add(static_cast<GIntBig>(GetValue<int64_t>(array, nIdx)));
4293 73 : else if (IsFloat16(schema->format))
4294 7 : oArray.Add(static_cast<double>(GetValueFloat16(array, nIdx)));
4295 66 : else if (IsFloat32(schema->format))
4296 7 : oArray.Add(static_cast<double>(GetValue<float>(array, nIdx)));
4297 59 : else if (IsFloat64(schema->format))
4298 7 : oArray.Add(GetValue<double>(array, nIdx));
4299 52 : else if (IsString(schema->format))
4300 13 : oArray.Add(GetString<uint32_t>(array, nIdx));
4301 39 : else if (IsLargeString(schema->format))
4302 4 : oArray.Add(GetString<uint64_t>(array, nIdx));
4303 35 : else if (IsBinary(schema->format))
4304 2 : oArray.Add(GetBinaryAsBase64<uint32_t>(array, nIdx));
4305 33 : else if (IsLargeBinary(schema->format))
4306 2 : oArray.Add(GetBinaryAsBase64<uint64_t>(array, nIdx));
4307 31 : else if (IsFixedWidthBinary(schema->format))
4308 2 : oArray.Add(GetValueFixedWithBinaryAsBase64(
4309 2 : array, GetFixedWithBinary(schema->format), nIdx));
4310 29 : else if (IsDecimal(schema->format))
4311 : {
4312 7 : int nPrecision = 0;
4313 7 : int nScale = 0;
4314 7 : int nWidthInBytes = 0;
4315 7 : const bool bOK = ParseDecimalFormat(schema->format, nPrecision, nScale,
4316 7 : nWidthInBytes);
4317 : // Already validated
4318 7 : CPLAssert(bOK);
4319 7 : CPL_IGNORE_RET_VAL(bOK);
4320 7 : oArray.Add(GetValueDecimal(array, nWidthInBytes / 8, nScale, nIdx));
4321 : }
4322 : else
4323 22 : oArray.Add(GetObjectAsJSON(schema, array, nIdx));
4324 142 : }
4325 :
4326 : /************************************************************************/
4327 : /* GetListAsJSON() */
4328 : /************************************************************************/
4329 :
4330 : template <class OffsetType>
4331 112 : static CPLJSONArray GetListAsJSON(const struct ArrowSchema *schema,
4332 : const struct ArrowArray *array,
4333 : const size_t nIdx)
4334 : {
4335 112 : CPLJSONArray oArray;
4336 112 : const auto panOffsets = static_cast<const OffsetType *>(array->buffers[1]) +
4337 112 : array->offset + nIdx;
4338 112 : const auto childSchema = schema->children[0];
4339 112 : const auto childArray = array->children[0];
4340 5 : const uint8_t *pabyValidity =
4341 112 : childArray->null_count == 0
4342 : ? nullptr
4343 107 : : static_cast<const uint8_t *>(childArray->buffers[0]);
4344 278 : for (size_t k = static_cast<size_t>(panOffsets[0]);
4345 278 : k < static_cast<size_t>(panOffsets[1]); k++)
4346 : {
4347 318 : if (!pabyValidity ||
4348 152 : TestBit(pabyValidity, k + static_cast<size_t>(childArray->offset)))
4349 : {
4350 136 : AddToArray(oArray, childSchema, childArray, k);
4351 : }
4352 : else
4353 : {
4354 30 : oArray.AddNull();
4355 : }
4356 : }
4357 112 : return oArray;
4358 : }
4359 :
4360 : /************************************************************************/
4361 : /* GetFixedSizeListAsJSON() */
4362 : /************************************************************************/
4363 :
4364 3 : static CPLJSONArray GetFixedSizeListAsJSON(const struct ArrowSchema *schema,
4365 : const struct ArrowArray *array,
4366 : const size_t nIdx)
4367 : {
4368 3 : CPLJSONArray oArray;
4369 3 : const int nVals = GetFixedSizeList(schema->format);
4370 3 : const auto childSchema = schema->children[0];
4371 3 : const auto childArray = array->children[0];
4372 3 : const uint8_t *pabyValidity =
4373 3 : childArray->null_count == 0
4374 3 : ? nullptr
4375 3 : : static_cast<const uint8_t *>(childArray->buffers[0]);
4376 9 : for (size_t k = nIdx * nVals; k < (nIdx + 1) * nVals; k++)
4377 : {
4378 12 : if (!pabyValidity ||
4379 6 : TestBit(pabyValidity, k + static_cast<size_t>(childArray->offset)))
4380 : {
4381 6 : AddToArray(oArray, childSchema, childArray, k);
4382 : }
4383 : else
4384 : {
4385 0 : oArray.AddNull();
4386 : }
4387 : }
4388 3 : return oArray;
4389 : }
4390 :
4391 : /************************************************************************/
4392 : /* AddToDict() */
4393 : /************************************************************************/
4394 :
4395 198 : static void AddToDict(CPLJSONObject &oDict, const std::string &osKey,
4396 : const struct ArrowSchema *schema,
4397 : const struct ArrowArray *array, const size_t nIdx)
4398 : {
4399 198 : if (IsBoolean(schema->format))
4400 5 : oDict.Add(osKey, GetValue<bool>(array, nIdx));
4401 193 : else if (IsUInt8(schema->format))
4402 5 : oDict.Add(osKey, GetValue<uint8_t>(array, nIdx));
4403 188 : else if (IsInt8(schema->format))
4404 5 : oDict.Add(osKey, GetValue<int8_t>(array, nIdx));
4405 183 : else if (IsUInt16(schema->format))
4406 5 : oDict.Add(osKey, GetValue<uint16_t>(array, nIdx));
4407 178 : else if (IsInt16(schema->format))
4408 5 : oDict.Add(osKey, GetValue<int16_t>(array, nIdx));
4409 173 : else if (IsUInt32(schema->format))
4410 2 : oDict.Add(osKey, static_cast<GIntBig>(GetValue<uint32_t>(array, nIdx)));
4411 171 : else if (IsInt32(schema->format))
4412 6 : oDict.Add(osKey, GetValue<int32_t>(array, nIdx));
4413 165 : else if (IsUInt64(schema->format))
4414 5 : oDict.Add(osKey, GetValue<uint64_t>(array, nIdx));
4415 160 : else if (IsInt64(schema->format))
4416 22 : oDict.Add(osKey, static_cast<GIntBig>(GetValue<int64_t>(array, nIdx)));
4417 138 : else if (IsFloat16(schema->format))
4418 2 : oDict.Add(osKey, static_cast<double>(GetValueFloat16(array, nIdx)));
4419 136 : else if (IsFloat32(schema->format))
4420 5 : oDict.Add(osKey, static_cast<double>(GetValue<float>(array, nIdx)));
4421 131 : else if (IsFloat64(schema->format))
4422 19 : oDict.Add(osKey, GetValue<double>(array, nIdx));
4423 112 : else if (IsString(schema->format))
4424 14 : oDict.Add(osKey, GetString<uint32_t>(array, nIdx));
4425 98 : else if (IsLargeString(schema->format))
4426 2 : oDict.Add(osKey, GetString<uint64_t>(array, nIdx));
4427 96 : else if (IsBinary(schema->format))
4428 2 : oDict.Add(osKey, GetBinaryAsBase64<uint32_t>(array, nIdx));
4429 94 : else if (IsLargeBinary(schema->format))
4430 2 : oDict.Add(osKey, GetBinaryAsBase64<uint64_t>(array, nIdx));
4431 92 : else if (IsFixedWidthBinary(schema->format))
4432 2 : oDict.Add(osKey, GetValueFixedWithBinaryAsBase64(
4433 2 : array, GetFixedWithBinary(schema->format), nIdx));
4434 90 : else if (IsDecimal(schema->format))
4435 : {
4436 8 : int nPrecision = 0;
4437 8 : int nScale = 0;
4438 8 : int nWidthInBytes = 0;
4439 8 : const bool bOK = ParseDecimalFormat(schema->format, nPrecision, nScale,
4440 8 : nWidthInBytes);
4441 : // Already validated
4442 8 : CPLAssert(bOK);
4443 8 : CPL_IGNORE_RET_VAL(bOK);
4444 8 : oDict.Add(osKey,
4445 : GetValueDecimal(array, nWidthInBytes / 8, nScale, nIdx));
4446 : }
4447 : else
4448 82 : oDict.Add(osKey, GetObjectAsJSON(schema, array, nIdx));
4449 198 : }
4450 :
4451 : /************************************************************************/
4452 : /* GetMapAsJSON() */
4453 : /************************************************************************/
4454 :
4455 243 : static CPLJSONObject GetMapAsJSON(const struct ArrowSchema *schema,
4456 : const struct ArrowArray *array,
4457 : const size_t nIdx)
4458 : {
4459 243 : const auto schemaStruct = schema->children[0];
4460 243 : if (!IsStructure(schemaStruct->format))
4461 : {
4462 0 : CPLError(CE_Failure, CPLE_AppDefined,
4463 : "GetMapAsJSON(): !IsStructure(schemaStruct->format))");
4464 0 : return CPLJSONObject();
4465 : }
4466 243 : const auto schemaKey = schemaStruct->children[0];
4467 243 : const auto schemaValues = schemaStruct->children[1];
4468 243 : if (!IsString(schemaKey->format))
4469 : {
4470 0 : CPLError(CE_Failure, CPLE_AppDefined,
4471 : "GetMapAsJSON(): !IsString(schemaKey->format))");
4472 0 : return CPLJSONObject();
4473 : }
4474 243 : const auto arrayKeys = array->children[0]->children[0];
4475 243 : const auto arrayValues = array->children[0]->children[1];
4476 :
4477 486 : CPLJSONObject oDict;
4478 243 : const auto panOffsets =
4479 243 : static_cast<const uint32_t *>(array->buffers[1]) + array->offset + nIdx;
4480 243 : const uint8_t *pabyValidityKeys =
4481 243 : arrayKeys->null_count == 0
4482 243 : ? nullptr
4483 0 : : static_cast<const uint8_t *>(arrayKeys->buffers[0]);
4484 243 : const uint32_t *panOffsetsKeys =
4485 243 : static_cast<const uint32_t *>(arrayKeys->buffers[1]) +
4486 243 : arrayKeys->offset;
4487 243 : const char *pabyKeys = static_cast<const char *>(arrayKeys->buffers[2]);
4488 243 : const uint8_t *pabyValidityValues =
4489 243 : arrayValues->null_count == 0
4490 243 : ? nullptr
4491 237 : : static_cast<const uint8_t *>(arrayValues->buffers[0]);
4492 463 : for (uint32_t k = panOffsets[0]; k < panOffsets[1]; k++)
4493 : {
4494 220 : if (!pabyValidityKeys ||
4495 0 : TestBit(pabyValidityKeys,
4496 0 : k + static_cast<size_t>(arrayKeys->offset)))
4497 : {
4498 440 : std::string osKey;
4499 220 : osKey.assign(pabyKeys + panOffsetsKeys[k],
4500 220 : panOffsetsKeys[k + 1] - panOffsetsKeys[k]);
4501 :
4502 433 : if (!pabyValidityValues ||
4503 213 : TestBit(pabyValidityValues,
4504 213 : k + static_cast<size_t>(arrayValues->offset)))
4505 : {
4506 168 : AddToDict(oDict, osKey, schemaValues, arrayValues, k);
4507 : }
4508 : else
4509 : {
4510 52 : oDict.AddNull(osKey);
4511 : }
4512 : }
4513 : }
4514 243 : return oDict;
4515 : }
4516 :
4517 : /************************************************************************/
4518 : /* GetStructureAsJSON() */
4519 : /************************************************************************/
4520 :
4521 16 : static CPLJSONObject GetStructureAsJSON(const struct ArrowSchema *schema,
4522 : const struct ArrowArray *array,
4523 : const size_t nIdx)
4524 : {
4525 16 : CPLJSONObject oDict;
4526 62 : for (int64_t k = 0; k < array->n_children; k++)
4527 : {
4528 46 : const uint8_t *pabyValidityValues =
4529 46 : array->children[k]->null_count == 0
4530 46 : ? nullptr
4531 36 : : static_cast<const uint8_t *>(array->children[k]->buffers[0]);
4532 82 : if (!pabyValidityValues ||
4533 36 : TestBit(pabyValidityValues,
4534 36 : nIdx + static_cast<size_t>(array->children[k]->offset)))
4535 : {
4536 30 : AddToDict(oDict, schema->children[k]->name, schema->children[k],
4537 30 : array->children[k], nIdx);
4538 : }
4539 : else
4540 : {
4541 16 : oDict.AddNull(schema->children[k]->name);
4542 : }
4543 : }
4544 16 : return oDict;
4545 : }
4546 :
4547 : /************************************************************************/
4548 : /* GetObjectAsJSON() */
4549 : /************************************************************************/
4550 :
4551 104 : static CPLJSONObject GetObjectAsJSON(const struct ArrowSchema *schema,
4552 : const struct ArrowArray *array,
4553 : const size_t nIdx)
4554 : {
4555 104 : if (IsMap(schema->format))
4556 4 : return GetMapAsJSON(schema, array, nIdx);
4557 100 : else if (IsList(schema->format))
4558 156 : return GetListAsJSON<uint32_t>(schema, array, nIdx);
4559 22 : else if (IsLargeList(schema->format))
4560 6 : return GetListAsJSON<uint64_t>(schema, array, nIdx);
4561 19 : else if (IsFixedSizeList(schema->format))
4562 6 : return GetFixedSizeListAsJSON(schema, array, nIdx);
4563 16 : else if (IsStructure(schema->format))
4564 16 : return GetStructureAsJSON(schema, array, nIdx);
4565 : else
4566 : {
4567 0 : CPLError(CE_Failure, CPLE_AppDefined,
4568 : "GetObjectAsJSON(): unhandled value format: %s",
4569 0 : schema->format);
4570 0 : return CPLJSONObject();
4571 : }
4572 : }
4573 :
4574 : /************************************************************************/
4575 : /* SetFieldForOtherFormats() */
4576 : /************************************************************************/
4577 :
4578 856 : static bool SetFieldForOtherFormats(OGRFeature &oFeature,
4579 : const int iOGRFieldIndex,
4580 : const size_t nOffsettedIndex,
4581 : const struct ArrowSchema *schema,
4582 : const struct ArrowArray *array)
4583 : {
4584 856 : const char *format = schema->format;
4585 856 : if (IsFloat16(format))
4586 : {
4587 4 : oFeature.SetField(
4588 : iOGRFieldIndex,
4589 4 : static_cast<double>(GetValueFloat16(
4590 4 : array, nOffsettedIndex - static_cast<size_t>(array->offset))));
4591 : }
4592 :
4593 852 : else if (IsFixedWidthBinary(format))
4594 : {
4595 : // Fixed width binary
4596 17 : const int nWidth = GetFixedWithBinary(format);
4597 17 : oFeature.SetField(iOGRFieldIndex, nWidth,
4598 17 : static_cast<const GByte *>(array->buffers[1]) +
4599 17 : nOffsettedIndex * nWidth);
4600 : }
4601 835 : else if (format[0] == 't' && format[1] == 'd' &&
4602 38 : format[2] == 'D') // strcmp(format, "tdD") == 0
4603 : {
4604 : // date32[days]
4605 : // number of days since Epoch
4606 33 : int64_t timestamp = static_cast<int64_t>(static_cast<const int32_t *>(
4607 33 : array->buffers[1])[nOffsettedIndex]) *
4608 : 3600 * 24;
4609 : struct tm dt;
4610 33 : CPLUnixTimeToYMDHMS(timestamp, &dt);
4611 33 : oFeature.SetField(iOGRFieldIndex, dt.tm_year + 1900, dt.tm_mon + 1,
4612 : dt.tm_mday, 0, 0, 0);
4613 33 : return true;
4614 : }
4615 802 : else if (format[0] == 't' && format[1] == 'd' &&
4616 5 : format[2] == 'm') // strcmp(format, "tdm") == 0
4617 : {
4618 : // date64[milliseconds]
4619 : // number of milliseconds since Epoch
4620 5 : int64_t timestamp =
4621 5 : static_cast<const int64_t *>(array->buffers[1])[nOffsettedIndex] /
4622 : 1000;
4623 : struct tm dt;
4624 5 : CPLUnixTimeToYMDHMS(timestamp, &dt);
4625 5 : oFeature.SetField(iOGRFieldIndex, dt.tm_year + 1900, dt.tm_mon + 1,
4626 5 : dt.tm_mday, 0, 0, 0);
4627 : }
4628 797 : else if (format[0] == 't' && format[1] == 't' &&
4629 39 : format[2] == 's') // strcmp(format, "tts") == 0
4630 : {
4631 : // time32 [seconds]
4632 0 : int32_t value =
4633 0 : static_cast<const int32_t *>(array->buffers[1])[nOffsettedIndex];
4634 0 : const int nHour = value / 3600;
4635 0 : const int nMinute = (value / 60) % 60;
4636 0 : const int nSecond = value % 60;
4637 0 : oFeature.SetField(iOGRFieldIndex, 0, 0, 0, nHour, nMinute,
4638 0 : static_cast<float>(nSecond));
4639 : }
4640 797 : else if (format[0] == 't' && format[1] == 't' &&
4641 39 : format[2] == 'm') // strcmp(format, "ttm") == 0
4642 : {
4643 : // time32 [milliseconds]
4644 25 : int32_t value =
4645 25 : static_cast<const int32_t *>(array->buffers[1])[nOffsettedIndex];
4646 25 : double floatingPart = (value % 1000) / 1e3;
4647 25 : value /= 1000;
4648 25 : const int nHour = value / 3600;
4649 25 : const int nMinute = (value / 60) % 60;
4650 25 : const int nSecond = value % 60;
4651 25 : oFeature.SetField(iOGRFieldIndex, 0, 0, 0, nHour, nMinute,
4652 25 : static_cast<float>(nSecond + floatingPart));
4653 : }
4654 772 : else if (format[0] == 't' && format[1] == 't' &&
4655 14 : (format[2] == 'u' || // time64 [microseconds]
4656 7 : format[2] == 'n')) // time64 [nanoseconds]
4657 : {
4658 14 : int64_t value =
4659 14 : static_cast<const int64_t *>(array->buffers[1])[nOffsettedIndex];
4660 14 : if (oFeature.GetFieldDefnRef(iOGRFieldIndex)->GetType() == OFTInteger64)
4661 : {
4662 2 : oFeature.SetField(iOGRFieldIndex, static_cast<GIntBig>(value));
4663 : }
4664 : else
4665 : {
4666 : double floatingPart;
4667 12 : if (format[2] == 'u')
4668 : {
4669 5 : floatingPart = (value % (1000 * 1000)) / 1e6;
4670 5 : value /= 1000 * 1000;
4671 : }
4672 : else
4673 : {
4674 7 : floatingPart = (value % (1000 * 1000 * 1000)) / 1e9;
4675 7 : value /= 1000 * 1000 * 1000;
4676 : }
4677 12 : const int nHour = static_cast<int>(value / 3600);
4678 12 : const int nMinute = static_cast<int>((value / 60) % 60);
4679 12 : const int nSecond = static_cast<int>(value % 60);
4680 12 : oFeature.SetField(iOGRFieldIndex, 0, 0, 0, nHour, nMinute,
4681 12 : static_cast<float>(nSecond + floatingPart));
4682 14 : }
4683 : }
4684 758 : else if (IsTimestampSeconds(format))
4685 : {
4686 0 : ArrowTimestampToOGRDateTime(
4687 0 : static_cast<const int64_t *>(array->buffers[1])[nOffsettedIndex], 1,
4688 : GetTimestampTimezone(format), oFeature, iOGRFieldIndex);
4689 : }
4690 758 : else if (IsTimestampMilliseconds(format))
4691 : {
4692 73 : ArrowTimestampToOGRDateTime(
4693 73 : static_cast<const int64_t *>(array->buffers[1])[nOffsettedIndex],
4694 : 1000, GetTimestampTimezone(format), oFeature, iOGRFieldIndex);
4695 : }
4696 685 : else if (IsTimestampMicroseconds(format))
4697 : {
4698 34 : ArrowTimestampToOGRDateTime(
4699 34 : static_cast<const int64_t *>(array->buffers[1])[nOffsettedIndex],
4700 : 1000 * 1000, GetTimestampTimezone(format), oFeature,
4701 : iOGRFieldIndex);
4702 : }
4703 651 : else if (IsTimestampNanoseconds(format))
4704 : {
4705 0 : ArrowTimestampToOGRDateTime(
4706 0 : static_cast<const int64_t *>(array->buffers[1])[nOffsettedIndex],
4707 : 1000 * 1000 * 1000, GetTimestampTimezone(format), oFeature,
4708 : iOGRFieldIndex);
4709 : }
4710 651 : else if (IsFixedSizeList(format))
4711 : {
4712 154 : const int nItems = GetFixedSizeList(format);
4713 154 : const auto childArray = array->children[0];
4714 154 : const char *childFormat = schema->children[0]->format;
4715 154 : if (IsBoolean(childFormat))
4716 : {
4717 24 : std::vector<int> aValues;
4718 12 : const auto *paValues =
4719 12 : static_cast<const uint8_t *>(childArray->buffers[1]);
4720 36 : for (int i = 0; i < nItems; ++i)
4721 : {
4722 24 : aValues.push_back(
4723 24 : TestBit(paValues,
4724 24 : static_cast<size_t>(childArray->offset +
4725 24 : nOffsettedIndex * nItems + i))
4726 24 : ? 1
4727 : : 0);
4728 : }
4729 12 : oFeature.SetField(iOGRFieldIndex, static_cast<int>(aValues.size()),
4730 12 : aValues.data());
4731 : }
4732 142 : else if (IsInt8(childFormat))
4733 : {
4734 12 : FillFieldFixedSizeList<int8_t, int>(array, iOGRFieldIndex,
4735 : nOffsettedIndex, nItems,
4736 : childArray, oFeature);
4737 : }
4738 130 : else if (IsUInt8(childFormat))
4739 : {
4740 12 : FillFieldFixedSizeList<uint8_t, int>(array, iOGRFieldIndex,
4741 : nOffsettedIndex, nItems,
4742 : childArray, oFeature);
4743 : }
4744 118 : else if (IsInt16(childFormat))
4745 : {
4746 12 : FillFieldFixedSizeList<int16_t, int>(array, iOGRFieldIndex,
4747 : nOffsettedIndex, nItems,
4748 : childArray, oFeature);
4749 : }
4750 106 : else if (IsUInt16(childFormat))
4751 : {
4752 12 : FillFieldFixedSizeList<uint16_t, int>(array, iOGRFieldIndex,
4753 : nOffsettedIndex, nItems,
4754 : childArray, oFeature);
4755 : }
4756 94 : else if (IsInt32(childFormat))
4757 : {
4758 12 : FillFieldFixedSizeList<int32_t, int>(array, iOGRFieldIndex,
4759 : nOffsettedIndex, nItems,
4760 : childArray, oFeature);
4761 : }
4762 82 : else if (IsUInt32(childFormat))
4763 : {
4764 5 : FillFieldFixedSizeList<uint32_t, GIntBig>(array, iOGRFieldIndex,
4765 : nOffsettedIndex, nItems,
4766 : childArray, oFeature);
4767 : }
4768 77 : else if (IsInt64(childFormat))
4769 : {
4770 19 : FillFieldFixedSizeList<int64_t, GIntBig>(array, iOGRFieldIndex,
4771 : nOffsettedIndex, nItems,
4772 : childArray, oFeature);
4773 : }
4774 58 : else if (IsUInt64(childFormat))
4775 : {
4776 12 : FillFieldFixedSizeList<uint64_t, double>(array, iOGRFieldIndex,
4777 : nOffsettedIndex, nItems,
4778 : childArray, oFeature);
4779 : }
4780 46 : else if (IsFloat16(childFormat))
4781 : {
4782 10 : std::vector<double> aValues;
4783 15 : for (int i = 0; i < nItems; ++i)
4784 : {
4785 10 : aValues.push_back(static_cast<double>(
4786 10 : GetValueFloat16(childArray, nOffsettedIndex * nItems + i)));
4787 : }
4788 5 : oFeature.SetField(iOGRFieldIndex, static_cast<int>(aValues.size()),
4789 5 : aValues.data());
4790 : }
4791 41 : else if (IsFloat32(childFormat))
4792 : {
4793 12 : FillFieldFixedSizeList<float, double>(array, iOGRFieldIndex,
4794 : nOffsettedIndex, nItems,
4795 : childArray, oFeature);
4796 : }
4797 29 : else if (IsFloat64(childFormat))
4798 : {
4799 12 : FillFieldFixedSizeList<double, double>(array, iOGRFieldIndex,
4800 : nOffsettedIndex, nItems,
4801 : childArray, oFeature);
4802 : }
4803 17 : else if (IsString(childFormat))
4804 : {
4805 12 : FillFieldFixedSizeListString<uint32_t>(array, iOGRFieldIndex,
4806 : nOffsettedIndex, nItems,
4807 : childArray, oFeature);
4808 : }
4809 5 : else if (IsLargeString(childFormat))
4810 : {
4811 5 : FillFieldFixedSizeListString<uint64_t>(array, iOGRFieldIndex,
4812 : nOffsettedIndex, nItems,
4813 : childArray, oFeature);
4814 : }
4815 : }
4816 497 : else if (IsList(format) || IsLargeList(format))
4817 : {
4818 254 : const auto childArray = array->children[0];
4819 254 : const char *childFormat = schema->children[0]->format;
4820 254 : if (IsBoolean(childFormat))
4821 : {
4822 16 : if (format[1] == ARROW_2ND_LETTER_LIST)
4823 12 : FillFieldListFromBool<uint32_t>(array, iOGRFieldIndex,
4824 : nOffsettedIndex, childArray,
4825 : oFeature);
4826 : else
4827 4 : FillFieldListFromBool<uint64_t>(array, iOGRFieldIndex,
4828 : nOffsettedIndex, childArray,
4829 : oFeature);
4830 : }
4831 238 : else if (IsInt8(childFormat))
4832 : {
4833 14 : if (format[1] == ARROW_2ND_LETTER_LIST)
4834 10 : FillFieldList<uint32_t, int8_t, int>(array, iOGRFieldIndex,
4835 : nOffsettedIndex,
4836 : childArray, oFeature);
4837 : else
4838 4 : FillFieldList<uint64_t, int8_t, int>(array, iOGRFieldIndex,
4839 : nOffsettedIndex,
4840 : childArray, oFeature);
4841 : }
4842 224 : else if (IsUInt8(childFormat))
4843 : {
4844 18 : if (format[1] == ARROW_2ND_LETTER_LIST)
4845 14 : FillFieldList<uint32_t, uint8_t, int>(array, iOGRFieldIndex,
4846 : nOffsettedIndex,
4847 : childArray, oFeature);
4848 : else
4849 4 : FillFieldList<uint64_t, uint8_t, int>(array, iOGRFieldIndex,
4850 : nOffsettedIndex,
4851 : childArray, oFeature);
4852 : }
4853 206 : else if (IsInt16(childFormat))
4854 : {
4855 16 : if (format[1] == ARROW_2ND_LETTER_LIST)
4856 12 : FillFieldList<uint32_t, int16_t, int>(array, iOGRFieldIndex,
4857 : nOffsettedIndex,
4858 : childArray, oFeature);
4859 : else
4860 4 : FillFieldList<uint64_t, int16_t, int>(array, iOGRFieldIndex,
4861 : nOffsettedIndex,
4862 : childArray, oFeature);
4863 : }
4864 190 : else if (IsUInt16(childFormat))
4865 : {
4866 14 : if (format[1] == ARROW_2ND_LETTER_LIST)
4867 10 : FillFieldList<uint32_t, uint16_t, int>(array, iOGRFieldIndex,
4868 : nOffsettedIndex,
4869 : childArray, oFeature);
4870 : else
4871 4 : FillFieldList<uint64_t, uint16_t, int>(array, iOGRFieldIndex,
4872 : nOffsettedIndex,
4873 : childArray, oFeature);
4874 : }
4875 176 : else if (IsInt32(childFormat))
4876 : {
4877 18 : if (format[1] == ARROW_2ND_LETTER_LIST)
4878 14 : FillFieldList<uint32_t, int32_t, int>(array, iOGRFieldIndex,
4879 : nOffsettedIndex,
4880 : childArray, oFeature);
4881 : else
4882 4 : FillFieldList<uint64_t, int32_t, int>(array, iOGRFieldIndex,
4883 : nOffsettedIndex,
4884 : childArray, oFeature);
4885 : }
4886 158 : else if (IsUInt32(childFormat))
4887 : {
4888 8 : if (format[1] == ARROW_2ND_LETTER_LIST)
4889 4 : FillFieldList<uint32_t, uint32_t, GIntBig>(
4890 : array, iOGRFieldIndex, nOffsettedIndex, childArray,
4891 : oFeature);
4892 : else
4893 4 : FillFieldList<uint64_t, uint32_t, GIntBig>(
4894 : array, iOGRFieldIndex, nOffsettedIndex, childArray,
4895 : oFeature);
4896 : }
4897 150 : else if (IsInt64(childFormat))
4898 : {
4899 31 : if (format[1] == ARROW_2ND_LETTER_LIST)
4900 27 : FillFieldList<uint32_t, int64_t, GIntBig>(array, iOGRFieldIndex,
4901 : nOffsettedIndex,
4902 : childArray, oFeature);
4903 : else
4904 4 : FillFieldList<uint64_t, int64_t, GIntBig>(array, iOGRFieldIndex,
4905 : nOffsettedIndex,
4906 : childArray, oFeature);
4907 : }
4908 119 : else if (IsUInt64(childFormat)) // (lossy conversion)
4909 : {
4910 14 : if (format[1] == ARROW_2ND_LETTER_LIST)
4911 10 : FillFieldList<uint32_t, uint64_t, double>(array, iOGRFieldIndex,
4912 : nOffsettedIndex,
4913 : childArray, oFeature);
4914 : else
4915 4 : FillFieldList<uint64_t, uint64_t, double>(array, iOGRFieldIndex,
4916 : nOffsettedIndex,
4917 : childArray, oFeature);
4918 : }
4919 105 : else if (IsFloat16(childFormat))
4920 : {
4921 8 : if (format[1] == ARROW_2ND_LETTER_LIST)
4922 4 : FillFieldListFromHalfFloat<uint32_t>(array, iOGRFieldIndex,
4923 : nOffsettedIndex,
4924 : childArray, oFeature);
4925 : else
4926 4 : FillFieldListFromHalfFloat<uint64_t>(array, iOGRFieldIndex,
4927 : nOffsettedIndex,
4928 : childArray, oFeature);
4929 : }
4930 97 : else if (IsFloat32(childFormat))
4931 : {
4932 16 : if (format[1] == ARROW_2ND_LETTER_LIST)
4933 12 : FillFieldList<uint32_t, float, double>(array, iOGRFieldIndex,
4934 : nOffsettedIndex,
4935 : childArray, oFeature);
4936 : else
4937 4 : FillFieldList<uint64_t, float, double>(array, iOGRFieldIndex,
4938 : nOffsettedIndex,
4939 : childArray, oFeature);
4940 : }
4941 81 : else if (IsFloat64(childFormat))
4942 : {
4943 18 : if (format[1] == ARROW_2ND_LETTER_LIST)
4944 14 : FillFieldList<uint32_t, double, double>(array, iOGRFieldIndex,
4945 : nOffsettedIndex,
4946 : childArray, oFeature);
4947 : else
4948 4 : FillFieldList<uint64_t, double, double>(array, iOGRFieldIndex,
4949 : nOffsettedIndex,
4950 : childArray, oFeature);
4951 : }
4952 63 : else if (IsString(childFormat))
4953 : {
4954 18 : if (format[1] == ARROW_2ND_LETTER_LIST)
4955 14 : FillFieldListFromString<uint32_t, uint32_t>(
4956 : array, iOGRFieldIndex, nOffsettedIndex, childArray,
4957 : oFeature);
4958 : else
4959 4 : FillFieldListFromString<uint64_t, uint32_t>(
4960 : array, iOGRFieldIndex, nOffsettedIndex, childArray,
4961 : oFeature);
4962 : }
4963 45 : else if (IsLargeString(childFormat))
4964 : {
4965 14 : if (format[1] == ARROW_2ND_LETTER_LIST)
4966 10 : FillFieldListFromString<uint32_t, uint64_t>(
4967 : array, iOGRFieldIndex, nOffsettedIndex, childArray,
4968 : oFeature);
4969 : else
4970 4 : FillFieldListFromString<uint64_t, uint64_t>(
4971 : array, iOGRFieldIndex, nOffsettedIndex, childArray,
4972 : oFeature);
4973 : }
4974 31 : else if (format[1] == ARROW_2ND_LETTER_LIST)
4975 : {
4976 31 : const size_t iFeature =
4977 31 : static_cast<size_t>(nOffsettedIndex - array->offset);
4978 31 : oFeature.SetField(iOGRFieldIndex,
4979 62 : GetListAsJSON<uint32_t>(schema, array, iFeature)
4980 62 : .Format(CPLJSONObject::PrettyFormat::Plain)
4981 : .c_str());
4982 : }
4983 : else
4984 : {
4985 0 : const size_t iFeature =
4986 0 : static_cast<size_t>(nOffsettedIndex - array->offset);
4987 0 : oFeature.SetField(iOGRFieldIndex,
4988 0 : GetListAsJSON<uint64_t>(schema, array, iFeature)
4989 0 : .Format(CPLJSONObject::PrettyFormat::Plain)
4990 : .c_str());
4991 : }
4992 : }
4993 243 : else if (IsDecimal(format))
4994 : {
4995 4 : int nPrecision = 0;
4996 4 : int nScale = 0;
4997 4 : int nWidthInBytes = 0;
4998 4 : if (!ParseDecimalFormat(format, nPrecision, nScale, nWidthInBytes))
4999 : {
5000 0 : CPLAssert(false);
5001 : }
5002 :
5003 : // fits on a int64
5004 4 : CPLAssert(nPrecision <= 19);
5005 : // either 128 or 256 bits
5006 4 : CPLAssert((nWidthInBytes % 8) == 0);
5007 4 : const int nWidthIn64BitWord = nWidthInBytes / 8;
5008 4 : const size_t iFeature =
5009 4 : static_cast<size_t>(nOffsettedIndex - array->offset);
5010 4 : oFeature.SetField(
5011 : iOGRFieldIndex,
5012 : GetValueDecimal(array, nWidthIn64BitWord, nScale, iFeature));
5013 4 : return true;
5014 : }
5015 239 : else if (IsMap(format))
5016 : {
5017 239 : const size_t iFeature =
5018 239 : static_cast<size_t>(nOffsettedIndex - array->offset);
5019 239 : oFeature.SetField(iOGRFieldIndex,
5020 478 : GetMapAsJSON(schema, array, iFeature)
5021 478 : .Format(CPLJSONObject::PrettyFormat::Plain)
5022 : .c_str());
5023 : }
5024 : else
5025 : {
5026 0 : return false;
5027 : }
5028 :
5029 819 : return true;
5030 : }
5031 :
5032 : /************************************************************************/
5033 : /* FillValidityArrayFromAttrQuery() */
5034 : /************************************************************************/
5035 :
5036 134 : static size_t FillValidityArrayFromAttrQuery(
5037 : const OGRLayer *poLayer, OGRFeatureQuery *poAttrQuery,
5038 : const struct ArrowSchema *schema, struct ArrowArray *array,
5039 : std::vector<bool> &abyValidityFromFilters, CSLConstList papszOptions)
5040 : {
5041 134 : size_t nCountIntersecting = 0;
5042 134 : auto poFeatureDefn = const_cast<OGRLayer *>(poLayer)->GetLayerDefn();
5043 268 : OGRFeature oFeature(poFeatureDefn);
5044 :
5045 268 : std::map<std::string, std::vector<int>> oMapFieldNameToArrowPath;
5046 268 : std::vector<int> anArrowPathTmp;
5047 134 : BuildMapFieldNameToArrowPath(schema, oMapFieldNameToArrowPath,
5048 268 : std::string(), anArrowPathTmp);
5049 :
5050 : struct UsedFieldsInfo
5051 : {
5052 : int iOGRFieldIndex{};
5053 : std::vector<int> anArrowPath{};
5054 : };
5055 :
5056 268 : std::vector<UsedFieldsInfo> aoUsedFieldsInfo;
5057 :
5058 134 : bool bNeedsFID = false;
5059 268 : const CPLStringList aosUsedFields(poAttrQuery->GetUsedFields());
5060 252 : for (int i = 0; i < aosUsedFields.size(); ++i)
5061 : {
5062 118 : int iOGRFieldIndex = poFeatureDefn->GetFieldIndex(aosUsedFields[i]);
5063 118 : if (iOGRFieldIndex >= 0)
5064 : {
5065 112 : const auto oIter = oMapFieldNameToArrowPath.find(aosUsedFields[i]);
5066 112 : if (oIter != oMapFieldNameToArrowPath.end())
5067 : {
5068 224 : UsedFieldsInfo info;
5069 112 : info.iOGRFieldIndex = iOGRFieldIndex;
5070 112 : info.anArrowPath = oIter->second;
5071 112 : aoUsedFieldsInfo.push_back(std::move(info));
5072 : }
5073 : else
5074 : {
5075 0 : CPLError(CE_Failure, CPLE_AppDefined,
5076 : "Cannot find %s in oMapFieldNameToArrowPath",
5077 : aosUsedFields[i]);
5078 : }
5079 : }
5080 6 : else if (EQUAL(aosUsedFields[i], "FID"))
5081 : {
5082 6 : bNeedsFID = true;
5083 : }
5084 : else
5085 : {
5086 0 : CPLDebug("OGR", "Cannot find used field %s", aosUsedFields[i]);
5087 : }
5088 : }
5089 :
5090 134 : const size_t nLength = abyValidityFromFilters.size();
5091 :
5092 134 : GIntBig nBaseSeqFID = -1;
5093 268 : std::vector<int> anArrowPathToFIDColumn;
5094 134 : if (bNeedsFID)
5095 : {
5096 : // BASE_SEQUENTIAL_FID is set when there is no Arrow column for the FID
5097 : // and we assume sequential FID numbering
5098 : const char *pszBaseSeqFID =
5099 6 : CSLFetchNameValue(papszOptions, "BASE_SEQUENTIAL_FID");
5100 6 : if (pszBaseSeqFID)
5101 : {
5102 5 : nBaseSeqFID = CPLAtoGIntBig(pszBaseSeqFID);
5103 :
5104 : // Optimizimation for "FID = constant"
5105 : swq_expr_node *poNode =
5106 5 : static_cast<swq_expr_node *>(poAttrQuery->GetSWQExpr());
5107 15 : if (poNode->eNodeType == SNT_OPERATION &&
5108 5 : poNode->nOperation == SWQ_EQ && poNode->nSubExprCount == 2 &&
5109 2 : poNode->papoSubExpr[0]->eNodeType == SNT_COLUMN &&
5110 2 : poNode->papoSubExpr[1]->eNodeType == SNT_CONSTANT &&
5111 2 : poNode->papoSubExpr[0]->field_index ==
5112 12 : poFeatureDefn->GetFieldCount() + SPF_FID &&
5113 2 : poNode->papoSubExpr[1]->field_type == SWQ_INTEGER64)
5114 : {
5115 2 : if (nBaseSeqFID + static_cast<int64_t>(nLength) <
5116 2 : poNode->papoSubExpr[1]->int_value ||
5117 2 : nBaseSeqFID > poNode->papoSubExpr[1]->int_value)
5118 : {
5119 0 : return 0;
5120 : }
5121 : }
5122 : }
5123 : else
5124 : {
5125 : const char *pszFIDColumn =
5126 1 : const_cast<OGRLayer *>(poLayer)->GetFIDColumn();
5127 1 : if (pszFIDColumn && pszFIDColumn[0])
5128 : {
5129 1 : const auto oIter = oMapFieldNameToArrowPath.find(pszFIDColumn);
5130 1 : if (oIter != oMapFieldNameToArrowPath.end())
5131 : {
5132 1 : anArrowPathToFIDColumn = oIter->second;
5133 : }
5134 : }
5135 1 : if (anArrowPathToFIDColumn.empty())
5136 : {
5137 0 : CPLError(CE_Failure, CPLE_AppDefined,
5138 : "Filtering on FID requested but cannot associate a "
5139 : "FID with Arrow records");
5140 : }
5141 : }
5142 : }
5143 :
5144 555 : for (size_t iRow = 0; iRow < nLength; ++iRow)
5145 : {
5146 421 : if (!abyValidityFromFilters[iRow])
5147 2 : continue;
5148 :
5149 419 : if (bNeedsFID)
5150 : {
5151 21 : if (nBaseSeqFID >= 0)
5152 : {
5153 11 : oFeature.SetFID(nBaseSeqFID + iRow);
5154 : }
5155 10 : else if (!anArrowPathToFIDColumn.empty())
5156 : {
5157 10 : oFeature.SetFID(OGRNullFID);
5158 :
5159 10 : const struct ArrowSchema *psSchemaField = schema;
5160 10 : const struct ArrowArray *psArray = array;
5161 10 : bool bSkip = false;
5162 20 : for (size_t i = 0; i < anArrowPathToFIDColumn.size(); ++i)
5163 : {
5164 10 : const int iChild = anArrowPathToFIDColumn[i];
5165 10 : if (i > 0)
5166 : {
5167 0 : const uint8_t *pabyValidity =
5168 0 : psArray->null_count == 0
5169 0 : ? nullptr
5170 : : static_cast<uint8_t *>(
5171 0 : const_cast<void *>(psArray->buffers[0]));
5172 0 : const size_t nOffsettedIndex =
5173 0 : static_cast<size_t>(iRow + psArray->offset);
5174 0 : if (pabyValidity &&
5175 0 : !TestBit(pabyValidity, nOffsettedIndex))
5176 : {
5177 0 : bSkip = true;
5178 0 : break;
5179 : }
5180 : }
5181 :
5182 10 : psSchemaField = psSchemaField->children[iChild];
5183 10 : psArray = psArray->children[iChild];
5184 : }
5185 10 : if (bSkip)
5186 0 : continue;
5187 :
5188 10 : const char *format = psSchemaField->format;
5189 10 : const uint8_t *pabyValidity =
5190 10 : psArray->null_count == 0
5191 10 : ? nullptr
5192 : : static_cast<uint8_t *>(
5193 0 : const_cast<void *>(psArray->buffers[0]));
5194 10 : const size_t nOffsettedIndex =
5195 10 : static_cast<size_t>(iRow + psArray->offset);
5196 10 : if (pabyValidity && !TestBit(pabyValidity, nOffsettedIndex))
5197 : {
5198 : // do nothing
5199 : }
5200 10 : else if (IsInt32(format))
5201 : {
5202 0 : oFeature.SetFID(static_cast<const int32_t *>(
5203 0 : psArray->buffers[1])[nOffsettedIndex]);
5204 : }
5205 10 : else if (IsInt64(format))
5206 : {
5207 10 : oFeature.SetFID(static_cast<const int64_t *>(
5208 10 : psArray->buffers[1])[nOffsettedIndex]);
5209 : }
5210 : }
5211 : }
5212 :
5213 725 : for (const auto &sInfo : aoUsedFieldsInfo)
5214 : {
5215 306 : const int iOGRFieldIndex = sInfo.iOGRFieldIndex;
5216 306 : const struct ArrowSchema *psSchemaField = schema;
5217 306 : const struct ArrowArray *psArray = array;
5218 306 : bool bSkip = false;
5219 612 : for (size_t i = 0; i < sInfo.anArrowPath.size(); ++i)
5220 : {
5221 306 : const int iChild = sInfo.anArrowPath[i];
5222 306 : if (i > 0)
5223 : {
5224 0 : const uint8_t *pabyValidity =
5225 0 : psArray->null_count == 0
5226 0 : ? nullptr
5227 : : static_cast<uint8_t *>(
5228 0 : const_cast<void *>(psArray->buffers[0]));
5229 0 : const size_t nOffsettedIndex =
5230 0 : static_cast<size_t>(iRow + psArray->offset);
5231 0 : if (pabyValidity && !TestBit(pabyValidity, nOffsettedIndex))
5232 : {
5233 0 : bSkip = true;
5234 0 : oFeature.SetFieldNull(iOGRFieldIndex);
5235 0 : break;
5236 : }
5237 : }
5238 :
5239 306 : psSchemaField = psSchemaField->children[iChild];
5240 306 : psArray = psArray->children[iChild];
5241 : }
5242 306 : if (bSkip)
5243 0 : continue;
5244 :
5245 306 : const char *format = psSchemaField->format;
5246 306 : const uint8_t *pabyValidity =
5247 306 : psArray->null_count == 0
5248 306 : ? nullptr
5249 : : static_cast<uint8_t *>(
5250 129 : const_cast<void *>(psArray->buffers[0]));
5251 306 : const size_t nOffsettedIndex =
5252 306 : static_cast<size_t>(iRow + psArray->offset);
5253 306 : if (pabyValidity && !TestBit(pabyValidity, nOffsettedIndex))
5254 : {
5255 38 : oFeature.SetFieldNull(iOGRFieldIndex);
5256 : }
5257 268 : else if (IsBoolean(format))
5258 : {
5259 78 : oFeature.SetField(
5260 : iOGRFieldIndex,
5261 78 : TestBit(static_cast<const uint8_t *>(psArray->buffers[1]),
5262 : nOffsettedIndex));
5263 : }
5264 190 : else if (IsInt8(format))
5265 : {
5266 8 : oFeature.SetField(iOGRFieldIndex,
5267 8 : static_cast<const int8_t *>(
5268 8 : psArray->buffers[1])[nOffsettedIndex]);
5269 : }
5270 182 : else if (IsUInt8(format))
5271 : {
5272 4 : oFeature.SetField(iOGRFieldIndex,
5273 4 : static_cast<const uint8_t *>(
5274 4 : psArray->buffers[1])[nOffsettedIndex]);
5275 : }
5276 178 : else if (IsInt16(format))
5277 : {
5278 16 : oFeature.SetField(iOGRFieldIndex,
5279 16 : static_cast<const int16_t *>(
5280 16 : psArray->buffers[1])[nOffsettedIndex]);
5281 : }
5282 162 : else if (IsUInt16(format))
5283 : {
5284 2 : oFeature.SetField(iOGRFieldIndex,
5285 2 : static_cast<const uint16_t *>(
5286 2 : psArray->buffers[1])[nOffsettedIndex]);
5287 : }
5288 160 : else if (IsInt32(format))
5289 : {
5290 10 : oFeature.SetField(iOGRFieldIndex,
5291 10 : static_cast<const int32_t *>(
5292 10 : psArray->buffers[1])[nOffsettedIndex]);
5293 : }
5294 150 : else if (IsUInt32(format))
5295 : {
5296 0 : oFeature.SetField(
5297 : iOGRFieldIndex,
5298 0 : static_cast<GIntBig>(static_cast<const uint32_t *>(
5299 0 : psArray->buffers[1])[nOffsettedIndex]));
5300 : }
5301 150 : else if (IsInt64(format))
5302 : {
5303 4 : oFeature.SetField(
5304 : iOGRFieldIndex,
5305 4 : static_cast<GIntBig>(static_cast<const int64_t *>(
5306 4 : psArray->buffers[1])[nOffsettedIndex]));
5307 : }
5308 146 : else if (IsUInt64(format))
5309 : {
5310 4 : oFeature.SetField(
5311 : iOGRFieldIndex,
5312 4 : static_cast<double>(static_cast<const uint64_t *>(
5313 4 : psArray->buffers[1])[nOffsettedIndex]));
5314 : }
5315 142 : else if (IsFloat32(format))
5316 : {
5317 2 : oFeature.SetField(
5318 : iOGRFieldIndex,
5319 2 : static_cast<double>(static_cast<const float *>(
5320 2 : psArray->buffers[1])[nOffsettedIndex]));
5321 : }
5322 140 : else if (IsFloat64(format))
5323 : {
5324 26 : oFeature.SetField(iOGRFieldIndex,
5325 26 : static_cast<const double *>(
5326 26 : psArray->buffers[1])[nOffsettedIndex]);
5327 : }
5328 114 : else if (IsString(format))
5329 : {
5330 18 : const auto nOffset = static_cast<const uint32_t *>(
5331 18 : psArray->buffers[1])[nOffsettedIndex];
5332 18 : const auto nNextOffset = static_cast<const uint32_t *>(
5333 18 : psArray->buffers[1])[nOffsettedIndex + 1];
5334 18 : const GByte *pabyData =
5335 18 : static_cast<const GByte *>(psArray->buffers[2]);
5336 18 : const uint32_t nSize = nNextOffset - nOffset;
5337 18 : CPLAssert(oFeature.GetFieldDefnRef(iOGRFieldIndex)->GetType() ==
5338 : OFTString);
5339 18 : char *pszStr = static_cast<char *>(CPLMalloc(nSize + 1));
5340 18 : memcpy(pszStr, pabyData + nOffset, nSize);
5341 18 : pszStr[nSize] = 0;
5342 18 : OGRField *psField = oFeature.GetRawFieldRef(iOGRFieldIndex);
5343 18 : if (IsValidField(psField))
5344 12 : CPLFree(psField->String);
5345 18 : psField->String = pszStr;
5346 : }
5347 96 : else if (IsLargeString(format))
5348 : {
5349 6 : const auto nOffset = static_cast<const uint64_t *>(
5350 6 : psArray->buffers[1])[nOffsettedIndex];
5351 6 : const auto nNextOffset = static_cast<const uint64_t *>(
5352 6 : psArray->buffers[1])[nOffsettedIndex + 1];
5353 6 : const GByte *pabyData =
5354 6 : static_cast<const GByte *>(psArray->buffers[2]);
5355 6 : const size_t nSize = static_cast<size_t>(nNextOffset - nOffset);
5356 6 : char *pszStr = static_cast<char *>(CPLMalloc(nSize + 1));
5357 6 : memcpy(pszStr, pabyData + static_cast<size_t>(nOffset), nSize);
5358 6 : pszStr[nSize] = 0;
5359 6 : OGRField *psField = oFeature.GetRawFieldRef(iOGRFieldIndex);
5360 6 : if (IsValidField(psField))
5361 3 : CPLFree(psField->String);
5362 6 : psField->String = pszStr;
5363 : }
5364 90 : else if (IsBinary(format))
5365 : {
5366 5 : const auto nOffset = static_cast<const uint32_t *>(
5367 5 : psArray->buffers[1])[nOffsettedIndex];
5368 5 : const auto nNextOffset = static_cast<const uint32_t *>(
5369 5 : psArray->buffers[1])[nOffsettedIndex + 1];
5370 5 : const GByte *pabyData =
5371 5 : static_cast<const GByte *>(psArray->buffers[2]);
5372 5 : const uint32_t nSize = nNextOffset - nOffset;
5373 10 : if (nSize >
5374 5 : static_cast<size_t>(std::numeric_limits<int32_t>::max()))
5375 : {
5376 0 : abyValidityFromFilters.clear();
5377 0 : abyValidityFromFilters.resize(nLength);
5378 0 : CPLError(CE_Failure, CPLE_AppDefined,
5379 : "Unexpected error in PostFilterArrowArray(): too "
5380 : "large binary");
5381 0 : return 0;
5382 : }
5383 5 : oFeature.SetField(iOGRFieldIndex, static_cast<int>(nSize),
5384 5 : pabyData + nOffset);
5385 : }
5386 85 : else if (IsLargeBinary(format))
5387 : {
5388 5 : const auto nOffset = static_cast<const uint64_t *>(
5389 5 : psArray->buffers[1])[nOffsettedIndex];
5390 5 : const auto nNextOffset = static_cast<const uint64_t *>(
5391 5 : psArray->buffers[1])[nOffsettedIndex + 1];
5392 5 : const GByte *pabyData =
5393 5 : static_cast<const GByte *>(psArray->buffers[2]);
5394 5 : const uint64_t nSize = nNextOffset - nOffset;
5395 5 : if (nSize >
5396 5 : static_cast<uint64_t>(std::numeric_limits<int32_t>::max()))
5397 : {
5398 0 : abyValidityFromFilters.clear();
5399 0 : abyValidityFromFilters.resize(nLength);
5400 0 : CPLError(CE_Failure, CPLE_AppDefined,
5401 : "Unexpected error in PostFilterArrowArray(): too "
5402 : "large binary");
5403 0 : return 0;
5404 : }
5405 5 : oFeature.SetField(iOGRFieldIndex, static_cast<int>(nSize),
5406 5 : pabyData + nOffset);
5407 : }
5408 80 : else if (!SetFieldForOtherFormats(oFeature, iOGRFieldIndex,
5409 : nOffsettedIndex, psSchemaField,
5410 : psArray))
5411 : {
5412 0 : abyValidityFromFilters.clear();
5413 0 : abyValidityFromFilters.resize(nLength);
5414 0 : CPLError(
5415 : CE_Failure, CPLE_AppDefined,
5416 : "Unexpected error in PostFilterArrowArray(): unhandled "
5417 : "field format: %s",
5418 : format);
5419 0 : return 0;
5420 : }
5421 : }
5422 419 : if (poAttrQuery->Evaluate(&oFeature))
5423 : {
5424 215 : nCountIntersecting++;
5425 : }
5426 : else
5427 : {
5428 204 : abyValidityFromFilters[iRow] = false;
5429 : }
5430 : }
5431 134 : return nCountIntersecting;
5432 : }
5433 :
5434 : /************************************************************************/
5435 : /* OGRLayer::PostFilterArrowArray() */
5436 : /************************************************************************/
5437 :
5438 : /** Remove rows that aren't selected by the spatial or attribute filter.
5439 : *
5440 : * Assumes that CanPostFilterArrowArray() has been called and returned true.
5441 : */
5442 153 : void OGRLayer::PostFilterArrowArray(const struct ArrowSchema *schema,
5443 : struct ArrowArray *array,
5444 : CSLConstList papszOptions) const
5445 : {
5446 153 : if (!m_poFilterGeom && !m_poAttrQuery)
5447 43 : return;
5448 :
5449 153 : CPLAssert(schema->n_children == array->n_children);
5450 :
5451 153 : int64_t iGeomField = -1;
5452 153 : if (m_poFilterGeom)
5453 : {
5454 : const char *pszGeomFieldName =
5455 : const_cast<OGRLayer *>(this)
5456 21 : ->GetLayerDefn()
5457 21 : ->GetGeomFieldDefn(m_iGeomFieldFilter)
5458 21 : ->GetNameRef();
5459 837 : for (int64_t iField = 0; iField < schema->n_children; ++iField)
5460 : {
5461 837 : const auto fieldSchema = schema->children[iField];
5462 837 : if (strcmp(fieldSchema->name, pszGeomFieldName) == 0)
5463 : {
5464 21 : iGeomField = iField;
5465 21 : break;
5466 : }
5467 816 : CPLAssert(array->children[iField]->length ==
5468 : array->children[0]->length);
5469 : }
5470 : // Guaranteed if CanPostFilterArrowArray() returned true
5471 21 : CPLAssert(iGeomField >= 0);
5472 21 : CPLAssert(IsBinary(schema->children[iGeomField]->format) ||
5473 : IsLargeBinary(schema->children[iGeomField]->format));
5474 21 : CPLAssert(array->children[iGeomField]->n_buffers == 3);
5475 : }
5476 :
5477 153 : std::vector<bool> abyValidityFromFilters;
5478 153 : const size_t nLength = static_cast<size_t>(array->length);
5479 : const size_t nCountIntersectingGeom =
5480 174 : m_poFilterGeom ? (IsBinary(schema->children[iGeomField]->format)
5481 42 : ? FillValidityArrayFromWKBArray<uint32_t>(
5482 21 : array->children[iGeomField], this,
5483 : abyValidityFromFilters)
5484 0 : : FillValidityArrayFromWKBArray<uint64_t>(
5485 0 : array->children[iGeomField], this,
5486 : abyValidityFromFilters))
5487 153 : : nLength;
5488 153 : if (!m_poFilterGeom)
5489 132 : abyValidityFromFilters.resize(nLength, true);
5490 : const size_t nCountIntersecting =
5491 134 : m_poAttrQuery && nCountIntersectingGeom > 0
5492 306 : ? FillValidityArrayFromAttrQuery(this, m_poAttrQuery, schema, array,
5493 : abyValidityFromFilters,
5494 : papszOptions)
5495 19 : : m_poFilterGeom ? nCountIntersectingGeom
5496 153 : : nLength;
5497 : // Nothing to do ?
5498 153 : if (nCountIntersecting == nLength)
5499 : {
5500 : // CPLDebug("OGR", "All rows match filter");
5501 43 : return;
5502 : }
5503 :
5504 110 : if (nCountIntersecting == 0)
5505 : {
5506 27 : array->length = 0;
5507 : }
5508 83 : else if (!CompactStructArray(schema, array, 0, abyValidityFromFilters,
5509 : nCountIntersecting))
5510 : {
5511 0 : array->release(array);
5512 0 : memset(array, 0, sizeof(*array));
5513 : }
5514 : }
5515 :
5516 : /************************************************************************/
5517 : /* OGRCloneArrowArray */
5518 : /************************************************************************/
5519 :
5520 14093 : static bool OGRCloneArrowArray(const struct ArrowSchema *schema,
5521 : const struct ArrowArray *src_array,
5522 : struct ArrowArray *out_array,
5523 : size_t nParentOffset)
5524 : {
5525 14093 : memset(out_array, 0, sizeof(*out_array));
5526 14093 : const size_t nLength =
5527 14093 : static_cast<size_t>(src_array->length) - nParentOffset;
5528 14093 : out_array->length = nLength;
5529 14093 : out_array->null_count = src_array->null_count;
5530 14093 : out_array->release = OGRLayerDefaultReleaseArray;
5531 :
5532 14093 : bool bRet = true;
5533 :
5534 14093 : out_array->n_buffers = src_array->n_buffers;
5535 28186 : out_array->buffers = static_cast<const void **>(CPLCalloc(
5536 14093 : static_cast<size_t>(src_array->n_buffers), sizeof(const void *)));
5537 14093 : CPLAssert(static_cast<size_t>(src_array->length) >= nParentOffset);
5538 14093 : const char *format = schema->format;
5539 14093 : const auto nOffset = static_cast<size_t>(src_array->offset) + nParentOffset;
5540 41917 : for (int64_t i = 0; i < src_array->n_buffers; ++i)
5541 : {
5542 27824 : if (i == 0 || IsBoolean(format))
5543 : {
5544 14464 : if (i == 1)
5545 : {
5546 371 : CPLAssert(src_array->buffers[i]);
5547 : }
5548 14464 : if (src_array->buffers[i])
5549 : {
5550 8911 : const size_t nBytes = nLength ? (nLength + 7) / 8 : 1;
5551 : uint8_t *CPL_RESTRICT p = static_cast<uint8_t *>(
5552 8911 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nBytes));
5553 8911 : if (!p)
5554 : {
5555 0 : bRet = false;
5556 0 : break;
5557 : }
5558 8911 : const auto *CPL_RESTRICT pSrcArray =
5559 8911 : static_cast<const uint8_t *>(src_array->buffers[i]);
5560 8911 : if ((nOffset % 8) != 0)
5561 : {
5562 : // Make sure last byte is fully initialized
5563 2281 : p[nBytes - 1] = 0;
5564 7359 : for (size_t iRow = 0; iRow < nLength; ++iRow)
5565 : {
5566 5078 : if (TestBit(pSrcArray, nOffset + iRow))
5567 4949 : SetBit(p, iRow);
5568 : else
5569 129 : UnsetBit(p, iRow);
5570 : }
5571 : }
5572 : else
5573 : {
5574 6630 : memcpy(p, pSrcArray + nOffset / 8, nBytes);
5575 : }
5576 8911 : out_array->buffers[i] = p;
5577 : }
5578 : }
5579 13360 : else if (i == 1)
5580 : {
5581 11229 : CPLAssert(src_array->buffers[i]);
5582 11229 : size_t nEltSize = 0;
5583 11229 : size_t nExtraElt = 0;
5584 11229 : if (IsUInt8(format) || IsInt8(format))
5585 742 : nEltSize = sizeof(uint8_t);
5586 10487 : else if (IsUInt16(format) || IsInt16(format) || IsFloat16(format))
5587 762 : nEltSize = sizeof(uint16_t);
5588 19430 : else if (IsUInt32(format) || IsInt32(format) || IsFloat32(format) ||
5589 28056 : strcmp(format, "tdD") == 0 || strcmp(format, "tts") == 0 ||
5590 8626 : strcmp(format, "ttm") == 0)
5591 : {
5592 1316 : nEltSize = sizeof(uint32_t);
5593 : }
5594 13011 : else if (IsString(format) || IsBinary(format) || IsList(format) ||
5595 4602 : IsMap(format))
5596 : {
5597 4496 : nEltSize = sizeof(uint32_t);
5598 4496 : nExtraElt = 1;
5599 : }
5600 7455 : else if (IsUInt64(format) || IsInt64(format) || IsFloat64(format) ||
5601 1648 : strcmp(format, "tdm") == 0 || strcmp(format, "ttu") == 0 ||
5602 7455 : strcmp(format, "ttn") == 0 || IsTimestamp(format))
5603 : {
5604 3085 : nEltSize = sizeof(uint64_t);
5605 : }
5606 1318 : else if (IsLargeString(format) || IsLargeBinary(format) ||
5607 490 : IsLargeList(format))
5608 : {
5609 343 : nEltSize = sizeof(uint64_t);
5610 343 : nExtraElt = 1;
5611 : }
5612 485 : else if (IsFixedWidthBinary(format))
5613 : {
5614 111 : nEltSize = GetFixedWithBinary(format);
5615 : }
5616 374 : else if (IsDecimal(format))
5617 : {
5618 374 : int nPrecision = 0;
5619 374 : int nScale = 0;
5620 374 : int nWidthInBytes = 0;
5621 374 : if (!ParseDecimalFormat(format, nPrecision, nScale,
5622 : nWidthInBytes))
5623 : {
5624 0 : CPLError(
5625 : CE_Failure, CPLE_AppDefined,
5626 : "Unexpected error in OGRCloneArrowArray(): unhandled "
5627 : "field format: %s",
5628 : format);
5629 :
5630 0 : return false;
5631 : }
5632 374 : nEltSize = nWidthInBytes;
5633 : }
5634 11229 : if (nEltSize)
5635 : {
5636 11229 : void *p = VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
5637 : nLength ? nEltSize * (nLength + nExtraElt) : 1);
5638 11229 : if (!p)
5639 : {
5640 0 : bRet = false;
5641 0 : break;
5642 : }
5643 11229 : if (nLength)
5644 : {
5645 13022 : if ((IsString(format) || IsBinary(format)) &&
5646 1793 : static_cast<const uint32_t *>(
5647 1793 : src_array->buffers[1])[nOffset] != 0)
5648 : {
5649 258 : const auto *CPL_RESTRICT pSrcOffsets =
5650 258 : static_cast<const uint32_t *>(
5651 258 : src_array->buffers[1]) +
5652 : nOffset;
5653 258 : const auto nShiftOffset = pSrcOffsets[0];
5654 258 : auto *CPL_RESTRICT pDstOffsets =
5655 : static_cast<uint32_t *>(p);
5656 1118 : for (size_t iRow = 0; iRow <= nLength; ++iRow)
5657 : {
5658 860 : pDstOffsets[iRow] =
5659 860 : pSrcOffsets[iRow] - nShiftOffset;
5660 : }
5661 : }
5662 11309 : else if ((IsLargeString(format) || IsLargeBinary(format)) &&
5663 338 : static_cast<const uint64_t *>(
5664 338 : src_array->buffers[1])[nOffset] != 0)
5665 : {
5666 86 : const auto *CPL_RESTRICT pSrcOffsets =
5667 86 : static_cast<const uint64_t *>(
5668 86 : src_array->buffers[1]) +
5669 : nOffset;
5670 86 : const auto nShiftOffset = pSrcOffsets[0];
5671 86 : auto *CPL_RESTRICT pDstOffsets =
5672 : static_cast<uint64_t *>(p);
5673 344 : for (size_t iRow = 0; iRow <= nLength; ++iRow)
5674 : {
5675 258 : pDstOffsets[iRow] =
5676 258 : pSrcOffsets[iRow] - nShiftOffset;
5677 : }
5678 : }
5679 : else
5680 : {
5681 10885 : memcpy(
5682 : p,
5683 10885 : static_cast<const GByte *>(src_array->buffers[i]) +
5684 10885 : nEltSize * nOffset,
5685 10885 : nEltSize * (nLength + nExtraElt));
5686 : }
5687 : }
5688 11229 : out_array->buffers[i] = p;
5689 : }
5690 : else
5691 : {
5692 0 : CPLError(CE_Failure, CPLE_AppDefined,
5693 : "OGRCloneArrowArray(): unhandled case, array = %s, "
5694 : "format = '%s', i = 1",
5695 0 : schema->name, format);
5696 0 : bRet = false;
5697 0 : break;
5698 : }
5699 : }
5700 2131 : else if (i == 2)
5701 : {
5702 2131 : CPLAssert(src_array->buffers[i]);
5703 2131 : size_t nSrcCharOffset = 0;
5704 2131 : size_t nCharCount = 0;
5705 2131 : if (IsString(format) || IsBinary(format))
5706 : {
5707 1793 : const auto *pSrcOffsets =
5708 1793 : static_cast<const uint32_t *>(src_array->buffers[1]) +
5709 : nOffset;
5710 1793 : nSrcCharOffset = pSrcOffsets[0];
5711 1793 : nCharCount = pSrcOffsets[nLength] - pSrcOffsets[0];
5712 : }
5713 338 : else if (IsLargeString(format) || IsLargeBinary(format))
5714 : {
5715 338 : const auto *pSrcOffsets =
5716 338 : static_cast<const uint64_t *>(src_array->buffers[1]) +
5717 : nOffset;
5718 338 : nSrcCharOffset = static_cast<size_t>(pSrcOffsets[0]);
5719 338 : nCharCount =
5720 338 : static_cast<size_t>(pSrcOffsets[nLength] - pSrcOffsets[0]);
5721 : }
5722 : else
5723 : {
5724 0 : CPLError(CE_Failure, CPLE_AppDefined,
5725 : "OGRCloneArrowArray(): unhandled case, array = %s, "
5726 : "format = '%s', i = 2",
5727 0 : schema->name, format);
5728 0 : bRet = false;
5729 0 : break;
5730 : }
5731 : void *p =
5732 2131 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nCharCount ? nCharCount : 1);
5733 2131 : if (!p)
5734 : {
5735 0 : bRet = false;
5736 0 : break;
5737 : }
5738 2131 : if (nCharCount)
5739 : {
5740 2131 : memcpy(p,
5741 2131 : static_cast<const GByte *>(src_array->buffers[i]) +
5742 : nSrcCharOffset,
5743 : nCharCount);
5744 : }
5745 2131 : out_array->buffers[i] = p;
5746 : }
5747 : else
5748 : {
5749 0 : CPLError(CE_Failure, CPLE_AppDefined,
5750 : "OGRCloneArrowArray(): unhandled case, array = %s, format "
5751 : "= '%s', i = 3",
5752 0 : schema->name, format);
5753 0 : bRet = false;
5754 0 : break;
5755 : }
5756 : }
5757 :
5758 14093 : if (bRet)
5759 : {
5760 14093 : out_array->n_children = src_array->n_children;
5761 14093 : out_array->children = static_cast<struct ArrowArray **>(
5762 14093 : CPLCalloc(static_cast<size_t>(src_array->n_children),
5763 : sizeof(struct ArrowArray *)));
5764 27945 : for (int64_t i = 0; i < src_array->n_children; ++i)
5765 : {
5766 27704 : out_array->children[i] = static_cast<struct ArrowArray *>(
5767 13852 : CPLCalloc(1, sizeof(struct ArrowArray)));
5768 40215 : if (!OGRCloneArrowArray(schema->children[i], src_array->children[i],
5769 13852 : out_array->children[i],
5770 13852 : IsFixedSizeList(format)
5771 1341 : ? nOffset * GetFixedSizeList(format)
5772 12511 : : IsStructure(format) ? nOffset
5773 : : 0))
5774 : {
5775 0 : bRet = false;
5776 0 : break;
5777 : }
5778 : }
5779 : }
5780 :
5781 14093 : if (bRet && src_array->dictionary)
5782 : {
5783 111 : out_array->dictionary = static_cast<struct ArrowArray *>(
5784 111 : CPLCalloc(1, sizeof(struct ArrowArray)));
5785 111 : bRet = OGRCloneArrowArray(schema->dictionary, src_array->dictionary,
5786 : out_array->dictionary, 0);
5787 : }
5788 :
5789 14093 : if (!bRet)
5790 : {
5791 0 : out_array->release(out_array);
5792 0 : memset(out_array, 0, sizeof(*out_array));
5793 : }
5794 14093 : return bRet;
5795 : }
5796 :
5797 : /** Full/deep copy of an array.
5798 : *
5799 : * Renormalize the offset of the array (and its children) to 0.
5800 : *
5801 : * In case of failure, out_array will be let in a released state.
5802 : *
5803 : * @param schema Schema of the array. Must *NOT* be NULL.
5804 : * @param src_array Source array. Must *NOT* be NULL.
5805 : * @param out_array Output array. Must *NOT* be NULL (but its content may be random)
5806 : * @return true if success.
5807 : */
5808 130 : bool OGRCloneArrowArray(const struct ArrowSchema *schema,
5809 : const struct ArrowArray *src_array,
5810 : struct ArrowArray *out_array)
5811 : {
5812 130 : return OGRCloneArrowArray(schema, src_array, out_array, 0);
5813 : }
5814 :
5815 : /************************************************************************/
5816 : /* OGRCloneArrowMetadata() */
5817 : /************************************************************************/
5818 :
5819 23 : static void *OGRCloneArrowMetadata(const void *pMetadata)
5820 : {
5821 23 : if (!pMetadata)
5822 19 : return nullptr;
5823 4 : std::vector<GByte> abyOut;
5824 4 : const GByte *pabyMetadata = static_cast<const GByte *>(pMetadata);
5825 : int32_t nKVP;
5826 4 : abyOut.insert(abyOut.end(), pabyMetadata, pabyMetadata + sizeof(int32_t));
5827 4 : memcpy(&nKVP, pabyMetadata, sizeof(int32_t));
5828 4 : pabyMetadata += sizeof(int32_t);
5829 8 : for (int i = 0; i < nKVP; ++i)
5830 : {
5831 : int32_t nSizeKey;
5832 0 : abyOut.insert(abyOut.end(), pabyMetadata,
5833 4 : pabyMetadata + sizeof(int32_t));
5834 4 : memcpy(&nSizeKey, pabyMetadata, sizeof(int32_t));
5835 4 : pabyMetadata += sizeof(int32_t);
5836 4 : abyOut.insert(abyOut.end(), pabyMetadata, pabyMetadata + nSizeKey);
5837 4 : pabyMetadata += nSizeKey;
5838 :
5839 : int32_t nSizeValue;
5840 0 : abyOut.insert(abyOut.end(), pabyMetadata,
5841 4 : pabyMetadata + sizeof(int32_t));
5842 4 : memcpy(&nSizeValue, pabyMetadata, sizeof(int32_t));
5843 4 : pabyMetadata += sizeof(int32_t);
5844 4 : abyOut.insert(abyOut.end(), pabyMetadata, pabyMetadata + nSizeValue);
5845 4 : pabyMetadata += nSizeValue;
5846 : }
5847 :
5848 4 : GByte *pabyOut = static_cast<GByte *>(VSI_MALLOC_VERBOSE(abyOut.size()));
5849 4 : if (pabyOut)
5850 4 : memcpy(pabyOut, abyOut.data(), abyOut.size());
5851 4 : return pabyOut;
5852 : }
5853 :
5854 : /************************************************************************/
5855 : /* OGRCloneArrowSchema() */
5856 : /************************************************************************/
5857 :
5858 : /** Full/deep copy of a schema.
5859 : *
5860 : * In case of failure, out_schema will be let in a released state.
5861 : *
5862 : * @param schema Schema to clone. Must *NOT* be NULL.
5863 : * @param out_schema Output schema. Must *NOT* be NULL (but its content may be random)
5864 : * @return true if success.
5865 : */
5866 23 : bool OGRCloneArrowSchema(const struct ArrowSchema *schema,
5867 : struct ArrowSchema *out_schema)
5868 : {
5869 23 : memset(out_schema, 0, sizeof(*out_schema));
5870 23 : out_schema->release = OGRLayerFullReleaseSchema;
5871 23 : out_schema->format = CPLStrdup(schema->format);
5872 23 : out_schema->name = CPLStrdup(schema->name);
5873 23 : out_schema->metadata = static_cast<const char *>(
5874 23 : const_cast<const void *>(OGRCloneArrowMetadata(schema->metadata)));
5875 23 : out_schema->flags = schema->flags;
5876 23 : if (schema->n_children)
5877 : {
5878 5 : out_schema->children =
5879 5 : static_cast<struct ArrowSchema **>(VSI_CALLOC_VERBOSE(
5880 : static_cast<int>(schema->n_children), sizeof(ArrowSchema *)));
5881 5 : if (!out_schema->children)
5882 : {
5883 0 : out_schema->release(out_schema);
5884 0 : return false;
5885 : }
5886 5 : out_schema->n_children = schema->n_children;
5887 23 : for (int i = 0; i < static_cast<int>(schema->n_children); ++i)
5888 : {
5889 36 : out_schema->children[i] = static_cast<struct ArrowSchema *>(
5890 18 : CPLMalloc(sizeof(ArrowSchema)));
5891 18 : if (!OGRCloneArrowSchema(schema->children[i],
5892 18 : out_schema->children[i]))
5893 : {
5894 0 : out_schema->release(out_schema);
5895 0 : return false;
5896 : }
5897 : }
5898 : }
5899 23 : if (schema->dictionary)
5900 : {
5901 0 : out_schema->dictionary =
5902 0 : static_cast<struct ArrowSchema *>(CPLMalloc(sizeof(ArrowSchema)));
5903 0 : if (!OGRCloneArrowSchema(schema->dictionary, out_schema->dictionary))
5904 : {
5905 0 : out_schema->release(out_schema);
5906 0 : return false;
5907 : }
5908 : }
5909 23 : return true;
5910 : }
5911 :
5912 : /************************************************************************/
5913 : /* OGRLayer::IsArrowSchemaSupported() */
5914 : /************************************************************************/
5915 :
5916 : const struct
5917 : {
5918 : const char *arrowType;
5919 : OGRFieldType eType;
5920 : OGRFieldSubType eSubType;
5921 : } gasArrowTypesToOGR[] = {
5922 : {"b", OFTInteger, OFSTBoolean}, {"c", OFTInteger, OFSTInt16}, // Int8
5923 : {"C", OFTInteger, OFSTInt16}, // UInt8
5924 : {"s", OFTInteger, OFSTInt16}, // Int16
5925 : {"S", OFTInteger, OFSTNone}, // UInt16
5926 : {"i", OFTInteger, OFSTNone}, // Int32
5927 : {"I", OFTInteger64, OFSTNone}, // UInt32
5928 : {"l", OFTInteger64, OFSTNone}, // Int64
5929 : {"L", OFTReal, OFSTNone}, // UInt64 (potentially lossy conversion if going through OGRFeature)
5930 : {"e", OFTReal, OFSTFloat32}, // float16
5931 : {"f", OFTReal, OFSTFloat32}, // float32
5932 : {"g", OFTReal, OFSTNone}, // float64
5933 : {"z", OFTBinary, OFSTNone}, // binary
5934 : {"Z", OFTBinary, OFSTNone}, // large binary (will be limited to 32 bit length though if going through OGRFeature!)
5935 : {"u", OFTString, OFSTNone}, // string
5936 : {"U", OFTString, OFSTNone}, // large string
5937 : {"tdD", OFTDate, OFSTNone}, // date32[days]
5938 : {"tdm", OFTDate, OFSTNone}, // date64[milliseconds]
5939 : {"tts", OFTTime, OFSTNone}, // time32 [seconds]
5940 : {"ttm", OFTTime, OFSTNone}, // time32 [milliseconds]
5941 : {"ttu", OFTTime, OFSTNone}, // time64 [microseconds]
5942 : {"ttn", OFTTime, OFSTNone}, // time64 [nanoseconds]
5943 : };
5944 :
5945 : const struct
5946 : {
5947 : const char arrowLetter;
5948 : OGRFieldType eType;
5949 : OGRFieldSubType eSubType;
5950 : } gasListTypes[] = {
5951 : {ARROW_LETTER_BOOLEAN, OFTIntegerList, OFSTBoolean},
5952 : {ARROW_LETTER_INT8, OFTIntegerList, OFSTInt16},
5953 : {ARROW_LETTER_UINT8, OFTIntegerList, OFSTInt16},
5954 : {ARROW_LETTER_INT16, OFTIntegerList, OFSTInt16},
5955 : {ARROW_LETTER_UINT16, OFTIntegerList, OFSTNone},
5956 : {ARROW_LETTER_INT32, OFTIntegerList, OFSTNone},
5957 : {ARROW_LETTER_UINT32, OFTInteger64List, OFSTNone},
5958 : {ARROW_LETTER_INT64, OFTInteger64List, OFSTNone},
5959 : {ARROW_LETTER_UINT64, OFTRealList,
5960 : OFSTNone}, //(potentially lossy conversion if going through OGRFeature)
5961 : {ARROW_LETTER_FLOAT16, OFTRealList, OFSTFloat32},
5962 : {ARROW_LETTER_FLOAT32, OFTRealList, OFSTFloat32},
5963 : {ARROW_LETTER_FLOAT64, OFTRealList, OFSTNone},
5964 : {ARROW_LETTER_STRING, OFTStringList, OFSTNone},
5965 : {ARROW_LETTER_LARGE_STRING, OFTStringList, OFSTNone},
5966 : };
5967 :
5968 43 : static inline bool IsValidDictionaryIndexType(const char *format)
5969 : {
5970 40 : return (format[0] == ARROW_LETTER_INT8 || format[0] == ARROW_LETTER_UINT8 ||
5971 37 : format[0] == ARROW_LETTER_INT16 ||
5972 34 : format[0] == ARROW_LETTER_UINT16 ||
5973 31 : format[0] == ARROW_LETTER_INT32 ||
5974 9 : format[0] == ARROW_LETTER_UINT32 ||
5975 6 : format[0] == ARROW_LETTER_INT64 ||
5976 89 : format[0] == ARROW_LETTER_UINT64) &&
5977 86 : format[1] == 0;
5978 : }
5979 :
5980 230 : static bool IsSupportForJSONObj(const struct ArrowSchema *schema)
5981 : {
5982 230 : const char *format = schema->format;
5983 230 : if (IsStructure(format))
5984 : {
5985 35 : for (int64_t i = 0; i < schema->n_children; ++i)
5986 : {
5987 26 : if (!IsSupportForJSONObj(schema->children[i]))
5988 0 : return false;
5989 : }
5990 9 : return true;
5991 : }
5992 :
5993 2752 : for (const auto &sType : gasListTypes)
5994 : {
5995 2626 : if (format[0] == sType.arrowLetter && format[1] == 0)
5996 : {
5997 95 : return true;
5998 : }
5999 : }
6000 :
6001 126 : if (IsBinary(format) || IsLargeBinary(format) || IsFixedWidthBinary(format))
6002 12 : return true;
6003 :
6004 114 : if (IsDecimal(format))
6005 : {
6006 6 : int nPrecision = 0;
6007 6 : int nScale = 0;
6008 6 : int nWidthInBytes = 0;
6009 6 : if (!ParseDecimalFormat(format, nPrecision, nScale, nWidthInBytes))
6010 : {
6011 0 : CPLError(CE_Failure, CPLE_AppDefined, "Invalid field format %s",
6012 : format);
6013 0 : return false;
6014 : }
6015 :
6016 6 : return GetErrorIfUnsupportedDecimal(nWidthInBytes, nPrecision) ==
6017 6 : nullptr;
6018 : }
6019 :
6020 108 : if (IsMap(format))
6021 : {
6022 74 : return IsStructure(schema->children[0]->format) &&
6023 148 : schema->children[0]->n_children == 2 &&
6024 222 : IsString(schema->children[0]->children[0]->format) &&
6025 148 : IsSupportForJSONObj(schema->children[0]->children[1]);
6026 : }
6027 :
6028 34 : if (IsList(format) || IsLargeList(format) || IsFixedSizeList(format))
6029 : {
6030 34 : return IsSupportForJSONObj(schema->children[0]);
6031 : }
6032 :
6033 0 : return false;
6034 : }
6035 :
6036 544 : static bool IsArrowSchemaSupportedInternal(const struct ArrowSchema *schema,
6037 : const std::string &osFieldPrefix,
6038 : std::string &osErrorMsg)
6039 : {
6040 0 : const auto AppendError = [&osErrorMsg](const std::string &osMsg)
6041 : {
6042 0 : if (!osErrorMsg.empty())
6043 0 : osErrorMsg += " ";
6044 0 : osErrorMsg += osMsg;
6045 544 : };
6046 :
6047 544 : const char *fieldName = schema->name;
6048 544 : const char *format = schema->format;
6049 544 : if (IsStructure(format))
6050 : {
6051 5 : bool bRet = true;
6052 5 : const std::string osNewPrefix(osFieldPrefix + fieldName + ".");
6053 21 : for (int64_t i = 0; i < schema->n_children; ++i)
6054 : {
6055 16 : if (!IsArrowSchemaSupportedInternal(schema->children[i],
6056 : osNewPrefix, osErrorMsg))
6057 0 : bRet = false;
6058 : }
6059 5 : return bRet;
6060 : }
6061 :
6062 539 : if (schema->dictionary)
6063 : {
6064 15 : if (!IsValidDictionaryIndexType(format))
6065 : {
6066 0 : AppendError("Dictionary only supported if the parent is of "
6067 : "type [U]Int[8|16|32|64]");
6068 0 : return false;
6069 : }
6070 :
6071 15 : schema = schema->dictionary;
6072 15 : format = schema->format;
6073 : }
6074 :
6075 539 : if (IsList(format) || IsLargeList(format) || IsFixedSizeList(format))
6076 : {
6077 : // Only some subtypes supported
6078 132 : const char *childFormat = schema->children[0]->format;
6079 1103 : for (const auto &sType : gasListTypes)
6080 : {
6081 1088 : if (childFormat[0] == sType.arrowLetter && childFormat[1] == 0)
6082 : {
6083 117 : return true;
6084 : }
6085 : }
6086 :
6087 15 : if (IsDecimal(childFormat))
6088 : {
6089 7 : int nPrecision = 0;
6090 7 : int nScale = 0;
6091 7 : int nWidthInBytes = 0;
6092 7 : if (!ParseDecimalFormat(childFormat, nPrecision, nScale,
6093 : nWidthInBytes))
6094 : {
6095 0 : AppendError(std::string("Invalid field format ") + childFormat +
6096 0 : " for field " + osFieldPrefix + fieldName);
6097 0 : return false;
6098 : }
6099 :
6100 : const char *pszError =
6101 7 : GetErrorIfUnsupportedDecimal(nWidthInBytes, nPrecision);
6102 7 : if (pszError)
6103 : {
6104 0 : AppendError(pszError);
6105 0 : return false;
6106 : }
6107 :
6108 7 : return true;
6109 : }
6110 :
6111 8 : if (IsSupportForJSONObj(schema))
6112 : {
6113 8 : return true;
6114 : }
6115 :
6116 0 : AppendError("Type list for field " + osFieldPrefix + fieldName +
6117 : " is not supported.");
6118 0 : return false;
6119 : }
6120 :
6121 407 : else if (IsMap(format))
6122 : {
6123 70 : if (IsSupportForJSONObj(schema))
6124 70 : return true;
6125 :
6126 0 : AppendError("Type map for field " + osFieldPrefix + fieldName +
6127 : " is not supported.");
6128 0 : return false;
6129 : }
6130 337 : else if (IsDecimal(format))
6131 : {
6132 6 : int nPrecision = 0;
6133 6 : int nScale = 0;
6134 6 : int nWidthInBytes = 0;
6135 6 : if (!ParseDecimalFormat(format, nPrecision, nScale, nWidthInBytes))
6136 : {
6137 0 : AppendError(std::string("Invalid field format ") + format +
6138 0 : " for field " + osFieldPrefix + fieldName);
6139 0 : return false;
6140 : }
6141 :
6142 : const char *pszError =
6143 6 : GetErrorIfUnsupportedDecimal(nWidthInBytes, nPrecision);
6144 6 : if (pszError)
6145 : {
6146 0 : AppendError(pszError);
6147 0 : return false;
6148 : }
6149 :
6150 6 : return true;
6151 : }
6152 : else
6153 : {
6154 4235 : for (const auto &sType : gasArrowTypesToOGR)
6155 : {
6156 4215 : if (strcmp(format, sType.arrowType) == 0)
6157 : {
6158 311 : return true;
6159 : }
6160 : }
6161 :
6162 20 : if (IsFixedWidthBinary(format) || IsTimestamp(format))
6163 20 : return true;
6164 :
6165 0 : AppendError("Type '" + std::string(format) + "' for field " +
6166 0 : osFieldPrefix + fieldName + " is not supported.");
6167 0 : return false;
6168 : }
6169 : }
6170 :
6171 : /** Returns whether the provided ArrowSchema is supported for writing.
6172 : *
6173 : * This method exists since not all drivers may support all Arrow data types.
6174 : *
6175 : * The ArrowSchema must be of type struct (format=+s)
6176 : *
6177 : * It is recommended to call this method before calling WriteArrowBatch().
6178 : *
6179 : * This is the same as the C function OGR_L_IsArrowSchemaSupported().
6180 : *
6181 : * @param schema Schema of type struct (format = '+s')
6182 : * @param papszOptions Options (none currently). Null terminated list, or nullptr.
6183 : * @param[out] osErrorMsg Reason of the failure, when this method returns false.
6184 : * @return true if the ArrowSchema is supported for writing.
6185 : * @since 3.8
6186 : */
6187 59 : bool OGRLayer::IsArrowSchemaSupported(const struct ArrowSchema *schema,
6188 : CPL_UNUSED CSLConstList papszOptions,
6189 : std::string &osErrorMsg) const
6190 : {
6191 59 : if (!IsStructure(schema->format))
6192 : {
6193 : osErrorMsg =
6194 : "IsArrowSchemaSupported() should be called on a schema that is a "
6195 1 : "struct of fields";
6196 1 : return false;
6197 : }
6198 :
6199 58 : bool bRet = true;
6200 586 : for (int64_t i = 0; i < schema->n_children; ++i)
6201 : {
6202 528 : if (!IsArrowSchemaSupportedInternal(schema->children[i], std::string(),
6203 : osErrorMsg))
6204 0 : bRet = false;
6205 : }
6206 58 : return bRet;
6207 : }
6208 :
6209 : /************************************************************************/
6210 : /* OGR_L_IsArrowSchemaSupported() */
6211 : /************************************************************************/
6212 :
6213 : /** Returns whether the provided ArrowSchema is supported for writing.
6214 : *
6215 : * This function exists since not all drivers may support all Arrow data types.
6216 : *
6217 : * The ArrowSchema must be of type struct (format=+s)
6218 : *
6219 : * It is recommended to call this function before calling OGR_L_WriteArrowBatch().
6220 : *
6221 : * This is the same as the C++ method OGRLayer::IsArrowSchemaSupported().
6222 : *
6223 : * @param hLayer Layer.
6224 : * @param schema Schema of type struct (format = '+s')
6225 : * @param papszOptions Options (none currently). Null terminated list, or nullptr.
6226 : * @param[out] ppszErrorMsg nullptr, or pointer to a string that will contain
6227 : * the reason of the failure, when this function returns false.
6228 : * @return true if the ArrowSchema is supported for writing.
6229 : * @since 3.8
6230 : */
6231 19 : bool OGR_L_IsArrowSchemaSupported(OGRLayerH hLayer,
6232 : const struct ArrowSchema *schema,
6233 : char **papszOptions, char **ppszErrorMsg)
6234 : {
6235 19 : VALIDATE_POINTER1(hLayer, __func__, false);
6236 19 : VALIDATE_POINTER1(schema, __func__, false);
6237 :
6238 38 : std::string osErrorMsg;
6239 38 : if (!OGRLayer::FromHandle(hLayer)->IsArrowSchemaSupported(
6240 19 : schema, papszOptions, osErrorMsg))
6241 : {
6242 4 : if (ppszErrorMsg)
6243 4 : *ppszErrorMsg = VSIStrdup(osErrorMsg.c_str());
6244 4 : return false;
6245 : }
6246 : else
6247 : {
6248 15 : if (ppszErrorMsg)
6249 15 : *ppszErrorMsg = nullptr;
6250 15 : return true;
6251 : }
6252 : }
6253 :
6254 : /************************************************************************/
6255 : /* IsKnownCodedFieldDomain() */
6256 : /************************************************************************/
6257 :
6258 34 : static bool IsKnownCodedFieldDomain(OGRLayer *poLayer,
6259 : const char *arrowMetadata)
6260 : {
6261 34 : if (arrowMetadata)
6262 : {
6263 6 : const auto oMetadata = OGRParseArrowMetadata(arrowMetadata);
6264 6 : for (const auto &oIter : oMetadata)
6265 : {
6266 6 : if (oIter.first == MD_GDAL_OGR_DOMAIN_NAME)
6267 : {
6268 6 : auto poDS = poLayer->GetDataset();
6269 6 : if (poDS)
6270 : {
6271 : const auto poFieldDomain =
6272 6 : poDS->GetFieldDomain(oIter.second);
6273 12 : if (poFieldDomain &&
6274 6 : poFieldDomain->GetDomainType() == OFDT_CODED)
6275 : {
6276 6 : return true;
6277 : }
6278 : }
6279 : }
6280 : }
6281 : }
6282 28 : return false;
6283 : }
6284 :
6285 : /************************************************************************/
6286 : /* OGRLayer::CreateFieldFromArrowSchema() */
6287 : /************************************************************************/
6288 :
6289 : //! @cond Doxygen_Suppress
6290 472 : bool OGRLayer::CreateFieldFromArrowSchemaInternal(
6291 : const struct ArrowSchema *schema, const std::string &osFieldPrefix,
6292 : CSLConstList papszOptions)
6293 : {
6294 472 : const char *fieldName = schema->name;
6295 472 : const char *format = schema->format;
6296 472 : if (IsStructure(format))
6297 : {
6298 5 : if (IsArrowTimeStampWithOffsetField(schema))
6299 : {
6300 0 : OGRFieldDefn oFieldDefn((osFieldPrefix + fieldName).c_str(),
6301 0 : OFTDateTime);
6302 0 : oFieldDefn.SetTZFlag(OGR_TZFLAG_MIXED_TZ);
6303 0 : auto poLayerDefn = GetLayerDefn();
6304 0 : const int nFieldCountBefore = poLayerDefn->GetFieldCount();
6305 0 : if (CreateField(&oFieldDefn) != OGRERR_NONE ||
6306 0 : nFieldCountBefore + 1 != poLayerDefn->GetFieldCount())
6307 : {
6308 0 : return false;
6309 : }
6310 : }
6311 : else
6312 : {
6313 5 : const std::string osNewPrefix(osFieldPrefix + fieldName + ".");
6314 21 : for (int64_t i = 0; i < schema->n_children; ++i)
6315 : {
6316 16 : if (!CreateFieldFromArrowSchemaInternal(
6317 16 : schema->children[i], osNewPrefix, papszOptions))
6318 0 : return false;
6319 : }
6320 : }
6321 5 : return true;
6322 : }
6323 :
6324 934 : CPLStringList aosNativeTypes;
6325 467 : auto poLayer = const_cast<OGRLayer *>(this);
6326 467 : auto poDS = poLayer->GetDataset();
6327 467 : if (poDS)
6328 : {
6329 467 : auto poDriver = poDS->GetDriver();
6330 467 : if (poDriver)
6331 : {
6332 : const char *pszMetadataItem =
6333 467 : poDriver->GetMetadataItem(GDAL_DMD_CREATIONFIELDDATATYPES);
6334 467 : if (pszMetadataItem)
6335 467 : aosNativeTypes = CSLTokenizeString2(pszMetadataItem, " ", 0);
6336 : }
6337 : }
6338 :
6339 482 : if (schema->dictionary &&
6340 15 : !IsKnownCodedFieldDomain(poLayer, schema->metadata))
6341 : {
6342 13 : if (!IsValidDictionaryIndexType(format))
6343 : {
6344 0 : CPLError(CE_Failure, CPLE_NotSupported,
6345 : "Dictionary only supported if the parent is of "
6346 : "type [U]Int[8|16|32|64]");
6347 0 : return false;
6348 : }
6349 :
6350 13 : schema = schema->dictionary;
6351 13 : format = schema->format;
6352 : }
6353 :
6354 467 : const auto AddField = [this, schema, fieldName, &aosNativeTypes,
6355 : &osFieldPrefix, poDS](OGRFieldType eTypeIn,
6356 : OGRFieldSubType eSubTypeIn,
6357 3305 : int nWidth, int nPrecision)
6358 : {
6359 467 : const char *pszTypeName = OGRFieldDefn::GetFieldTypeName(eTypeIn);
6360 467 : auto eTypeOut = eTypeIn;
6361 467 : auto eSubTypeOut = eSubTypeIn;
6362 934 : if (!aosNativeTypes.empty() &&
6363 467 : aosNativeTypes.FindString(pszTypeName) < 0)
6364 : {
6365 20 : eTypeOut = OFTString;
6366 20 : eSubTypeOut =
6367 15 : (eTypeIn == OFTIntegerList || eTypeIn == OFTInteger64List ||
6368 8 : eTypeIn == OFTRealList || eTypeIn == OFTStringList)
6369 35 : ? OFSTJSON
6370 : : OFSTNone;
6371 : }
6372 :
6373 934 : const std::string osWantedOGRFieldName = osFieldPrefix + fieldName;
6374 934 : OGRFieldDefn oFieldDefn(osWantedOGRFieldName.c_str(), eTypeOut);
6375 467 : oFieldDefn.SetSubType(eSubTypeOut);
6376 467 : if (eTypeOut == eTypeIn && eSubTypeOut == eSubTypeIn)
6377 : {
6378 447 : oFieldDefn.SetWidth(nWidth);
6379 447 : oFieldDefn.SetPrecision(nPrecision);
6380 : }
6381 467 : oFieldDefn.SetNullable((schema->flags & ARROW_FLAG_NULLABLE) != 0);
6382 :
6383 467 : if (schema->metadata)
6384 : {
6385 62 : const auto oMetadata = OGRParseArrowMetadata(schema->metadata);
6386 63 : for (const auto &oIter : oMetadata)
6387 : {
6388 32 : if (oIter.first == MD_GDAL_OGR_TYPE)
6389 : {
6390 4 : const auto &osType = oIter.second;
6391 48 : for (auto eType = OFTInteger; eType <= OFTMaxType;)
6392 : {
6393 48 : if (OGRFieldDefn::GetFieldTypeName(eType) == osType)
6394 : {
6395 4 : oFieldDefn.SetType(eType);
6396 4 : break;
6397 : }
6398 44 : if (eType == OFTMaxType)
6399 0 : break;
6400 : else
6401 44 : eType = static_cast<OGRFieldType>(eType + 1);
6402 : }
6403 : }
6404 28 : else if (oIter.first == MD_GDAL_OGR_ALTERNATIVE_NAME)
6405 2 : oFieldDefn.SetAlternativeName(oIter.second.c_str());
6406 26 : else if (oIter.first == MD_GDAL_OGR_COMMENT)
6407 2 : oFieldDefn.SetComment(oIter.second);
6408 24 : else if (oIter.first == MD_GDAL_OGR_DEFAULT)
6409 2 : oFieldDefn.SetDefault(oIter.second.c_str());
6410 22 : else if (oIter.first == MD_GDAL_OGR_SUBTYPE)
6411 : {
6412 5 : if (eTypeIn == eTypeOut)
6413 : {
6414 4 : const auto &osSubType = oIter.second;
6415 4 : for (auto eSubType = OFSTNone;
6416 15 : eSubType <= OFSTMaxSubType;)
6417 : {
6418 15 : if (OGRFieldDefn::GetFieldSubTypeName(eSubType) ==
6419 : osSubType)
6420 : {
6421 4 : oFieldDefn.SetSubType(eSubType);
6422 4 : break;
6423 : }
6424 11 : if (eSubType == OFSTMaxSubType)
6425 0 : break;
6426 : else
6427 11 : eSubType =
6428 11 : static_cast<OGRFieldSubType>(eSubType + 1);
6429 : }
6430 : }
6431 : }
6432 17 : else if (oIter.first == MD_GDAL_OGR_WIDTH)
6433 6 : oFieldDefn.SetWidth(atoi(oIter.second.c_str()));
6434 11 : else if (oIter.first == MD_GDAL_OGR_UNIQUE)
6435 2 : oFieldDefn.SetUnique(oIter.second == "true");
6436 9 : else if (oIter.first == MD_GDAL_OGR_DOMAIN_NAME)
6437 : {
6438 2 : if (poDS && poDS->GetFieldDomain(oIter.second))
6439 2 : oFieldDefn.SetDomainName(oIter.second);
6440 : }
6441 13 : else if (oIter.first == ARROW_EXTENSION_NAME_KEY &&
6442 6 : (oIter.second == EXTENSION_NAME_ARROW_JSON ||
6443 : // Used by BigQuery through ADBC driver
6444 0 : oIter.second == "google:sqlType:json"))
6445 : {
6446 6 : oFieldDefn.SetSubType(OFSTJSON);
6447 : }
6448 1 : else if (oIter.first == ARROW_EXTENSION_NAME_KEY)
6449 : {
6450 0 : CPLDebug("OGR", "Unknown Arrow extension: %s",
6451 : oIter.second.c_str());
6452 : }
6453 : else
6454 : {
6455 1 : CPLDebug("OGR", "Unknown field metadata: %s",
6456 : oIter.first.c_str());
6457 : }
6458 : }
6459 : }
6460 467 : auto poLayerDefn = GetLayerDefn();
6461 467 : const int nFieldCountBefore = poLayerDefn->GetFieldCount();
6462 934 : if (CreateField(&oFieldDefn) != OGRERR_NONE ||
6463 467 : nFieldCountBefore + 1 != poLayerDefn->GetFieldCount())
6464 : {
6465 0 : return false;
6466 : }
6467 : const char *pszActualFieldName =
6468 467 : poLayerDefn->GetFieldDefn(nFieldCountBefore)->GetNameRef();
6469 467 : if (pszActualFieldName != osWantedOGRFieldName)
6470 : {
6471 : m_poPrivate
6472 1 : ->m_oMapArrowFieldNameToOGRFieldName[osWantedOGRFieldName] =
6473 1 : pszActualFieldName;
6474 : }
6475 467 : return true;
6476 467 : };
6477 :
6478 8184 : for (const auto &sType : gasArrowTypesToOGR)
6479 : {
6480 7953 : if (strcmp(format, sType.arrowType) == 0)
6481 : {
6482 236 : return AddField(sType.eType, sType.eSubType, 0, 0);
6483 : }
6484 : }
6485 :
6486 231 : if (IsMap(format))
6487 : {
6488 70 : return AddField(OFTString, OFSTJSON, 0, 0);
6489 : }
6490 :
6491 161 : if (IsTimestamp(format))
6492 : {
6493 20 : return AddField(OFTDateTime, OFSTNone, 0, 0);
6494 : }
6495 :
6496 141 : if (IsFixedWidthBinary(format))
6497 : {
6498 3 : return AddField(OFTBinary, OFSTNone, GetFixedWithBinary(format), 0);
6499 : }
6500 :
6501 138 : if (IsList(format) || IsLargeList(format) || IsFixedSizeList(format))
6502 : {
6503 132 : const char *childFormat = schema->children[0]->format;
6504 1103 : for (const auto &sType : gasListTypes)
6505 : {
6506 1088 : if (childFormat[0] == sType.arrowLetter && childFormat[1] == 0)
6507 : {
6508 117 : return AddField(sType.eType, sType.eSubType, 0, 0);
6509 : }
6510 : }
6511 :
6512 15 : if (IsDecimal(childFormat))
6513 : {
6514 7 : int nPrecision = 0;
6515 7 : int nScale = 0;
6516 7 : int nWidthInBytes = 0;
6517 7 : if (!ParseDecimalFormat(childFormat, nPrecision, nScale,
6518 : nWidthInBytes))
6519 : {
6520 0 : CPLError(CE_Failure, CPLE_AppDefined, "%s",
6521 0 : (std::string("Invalid field format ") + format +
6522 0 : " for field " + osFieldPrefix + fieldName)
6523 : .c_str());
6524 0 : return false;
6525 : }
6526 :
6527 : const char *pszError =
6528 7 : GetErrorIfUnsupportedDecimal(nWidthInBytes, nPrecision);
6529 7 : if (pszError)
6530 : {
6531 0 : CPLError(CE_Failure, CPLE_NotSupported, "%s", pszError);
6532 0 : return false;
6533 : }
6534 :
6535 : // DBF convention: add space for negative sign and decimal separator
6536 7 : return AddField(OFTRealList, OFSTNone, nPrecision + 2, nScale);
6537 : }
6538 :
6539 8 : if (IsSupportForJSONObj(schema->children[0]))
6540 : {
6541 8 : return AddField(OFTString, OFSTJSON, 0, 0);
6542 : }
6543 :
6544 0 : CPLError(CE_Failure, CPLE_NotSupported, "%s",
6545 0 : ("List of type '" + std::string(childFormat) + "' for field " +
6546 0 : osFieldPrefix + fieldName + " is not supported.")
6547 : .c_str());
6548 0 : return false;
6549 : }
6550 :
6551 6 : if (IsDecimal(format))
6552 : {
6553 6 : int nPrecision = 0;
6554 6 : int nScale = 0;
6555 6 : int nWidthInBytes = 0;
6556 6 : if (!ParseDecimalFormat(format, nPrecision, nScale, nWidthInBytes))
6557 : {
6558 0 : CPLError(CE_Failure, CPLE_AppDefined, "%s",
6559 0 : (std::string("Invalid field format ") + format +
6560 0 : " for field " + osFieldPrefix + fieldName)
6561 : .c_str());
6562 0 : return false;
6563 : }
6564 :
6565 : const char *pszError =
6566 6 : GetErrorIfUnsupportedDecimal(nWidthInBytes, nPrecision);
6567 6 : if (pszError)
6568 : {
6569 0 : CPLError(CE_Failure, CPLE_NotSupported, "%s", pszError);
6570 0 : return false;
6571 : }
6572 :
6573 : // DBF convention: add space for negative sign and decimal separator
6574 6 : return AddField(OFTReal, OFSTNone, nPrecision + 2, nScale);
6575 : }
6576 :
6577 0 : CPLError(CE_Failure, CPLE_NotSupported, "%s",
6578 0 : ("Type '" + std::string(format) + "' for field " + osFieldPrefix +
6579 0 : fieldName + " is not supported.")
6580 : .c_str());
6581 0 : return false;
6582 : }
6583 :
6584 : //! @endcond
6585 :
6586 : /** Creates a field from an ArrowSchema.
6587 : *
6588 : * This should only be used for attribute fields. Geometry fields should
6589 : * be created with CreateGeomField(). The FID field should also not be
6590 : * passed with this method.
6591 : *
6592 : * Contrary to the IsArrowSchemaSupported() and WriteArrowBatch() methods, the
6593 : * passed schema must be for an individual field, and thus, is *not* of type
6594 : * struct (format=+s) (unless writing a set of fields grouped together in the
6595 : * same structure).
6596 : *
6597 : * Additional field metadata can be specified through the ArrowSchema::metadata
6598 : * field with the potential following items:
6599 : * <ul>
6600 : * <li>"GDAL:OGR:alternative_name": value of
6601 : * OGRFieldDefn::GetAlternativeNameRef()</li>
6602 : * <li>"GDAL:OGR:comment": value of OGRFieldDefn::GetComment()</li>
6603 : * <li>"GDAL:OGR:default": value of OGRFieldDefn::GetDefault()</li>
6604 : * <li>"GDAL:OGR:subtype": value of OGRFieldDefn::GetSubType()</li>
6605 : * <li>"GDAL:OGR:width": value of OGRFieldDefn::GetWidth() (serialized as a
6606 : * string)</li>
6607 : * <li>"GDAL:OGR:unique": value of OGRFieldDefn::IsUnique() (serialized as
6608 : * "true" or "false")</li>
6609 : * <li>"GDAL:OGR:domain_name": value of OGRFieldDefn::GetDomainName()</li>
6610 : * </ul>
6611 : *
6612 : * This method and CreateField() are mutually exclusive in the same session.
6613 : *
6614 : * This method is the same as the C function OGR_L_CreateFieldFromArrowSchema().
6615 : *
6616 : * @param schema Schema of the field to create.
6617 : * @param papszOptions Options (none currently). Null terminated list, or nullptr.
6618 : * @return true in case of success
6619 : * @since 3.8
6620 : */
6621 456 : bool OGRLayer::CreateFieldFromArrowSchema(const struct ArrowSchema *schema,
6622 : CSLConstList papszOptions)
6623 : {
6624 912 : return CreateFieldFromArrowSchemaInternal(schema, std::string(),
6625 912 : papszOptions);
6626 : }
6627 :
6628 : /************************************************************************/
6629 : /* OGR_L_CreateFieldFromArrowSchema() */
6630 : /************************************************************************/
6631 :
6632 : /** Creates a field from an ArrowSchema.
6633 : *
6634 : * This should only be used for attribute fields. Geometry fields should
6635 : * be created with CreateGeomField(). The FID field should also not be
6636 : * passed with this method.
6637 : *
6638 : * Contrary to the IsArrowSchemaSupported() and WriteArrowBatch() methods, the
6639 : * passed schema must be for an individual field, and thus, is *not* of type
6640 : * struct (format=+s) (unless writing a set of fields grouped together in the
6641 : * same structure).
6642 : *
6643 : * Additional field metadata can be specified through the ArrowSchema::metadata
6644 : * field with the potential following items:
6645 : * <ul>
6646 : * <li>"GDAL:OGR:alternative_name": value of
6647 : * OGRFieldDefn::GetAlternativeNameRef()</li>
6648 : * <li>"GDAL:OGR:comment": value of OGRFieldDefn::GetComment()</li>
6649 : * <li>"GDAL:OGR:default": value of OGRFieldDefn::GetDefault()</li>
6650 : * <li>"GDAL:OGR:subtype": value of OGRFieldDefn::GetSubType()</li>
6651 : * <li>"GDAL:OGR:width": value of OGRFieldDefn::GetWidth() (serialized as a
6652 : * string)</li>
6653 : * <li>"GDAL:OGR:unique": value of OGRFieldDefn::IsUnique() (serialized as
6654 : * "true" or "false")</li>
6655 : * <li>"GDAL:OGR:domain_name": value of OGRFieldDefn::GetDomainName()</li>
6656 : * </ul>
6657 : *
6658 : * This method and CreateField() are mutually exclusive in the same session.
6659 : *
6660 : * This method is the same as the C++ method OGRLayer::CreateFieldFromArrowSchema().
6661 : *
6662 : * @param hLayer Layer.
6663 : * @param schema Schema of the field to create.
6664 : * @param papszOptions Options (none currently). Null terminated list, or nullptr.
6665 : * @return true in case of success
6666 : * @since 3.8
6667 : */
6668 541 : bool OGR_L_CreateFieldFromArrowSchema(OGRLayerH hLayer,
6669 : const struct ArrowSchema *schema,
6670 : char **papszOptions)
6671 : {
6672 541 : VALIDATE_POINTER1(hLayer, __func__, false);
6673 541 : VALIDATE_POINTER1(schema, __func__, false);
6674 :
6675 1082 : return OGRLayer::FromHandle(hLayer)->CreateFieldFromArrowSchema(
6676 541 : schema, papszOptions);
6677 : }
6678 :
6679 : /************************************************************************/
6680 : /* BuildOGRFieldInfo() */
6681 : /************************************************************************/
6682 :
6683 : constexpr int FID_COLUMN_SPECIAL_OGR_FIELD_IDX = -2;
6684 :
6685 : struct FieldInfo
6686 : {
6687 : std::string osName{};
6688 : int iOGRFieldIdx = -1;
6689 : const char *format = nullptr;
6690 : OGRFieldType eNominalFieldType =
6691 : OFTMaxType; // OGR data type that would best match the Arrow type
6692 : OGRFieldType eTargetFieldType =
6693 : OFTMaxType; // actual OGR data type of the layer field
6694 : // OGR data type of the feature passed to FillFeature()
6695 : OGRFieldType eSetFeatureFieldType = OFTMaxType;
6696 : bool bIsGeomCol = false;
6697 : bool bUseDictionary = false;
6698 : bool bUseStringOptim = false;
6699 : int nWidthInBytes = 0; // only used for decimal fields
6700 : int nPrecision = 0; // only used for decimal fields
6701 : int nScale = 0; // only used for decimal fields
6702 : };
6703 :
6704 779 : static bool BuildOGRFieldInfo(
6705 : const struct ArrowSchema *schema, struct ArrowArray *array,
6706 : const OGRFeatureDefn *poFeatureDefn, const std::string &osFieldPrefix,
6707 : const CPLStringList &aosNativeTypes, bool &bFallbackTypesUsed,
6708 : std::vector<FieldInfo> &asFieldInfo, const char *pszFIDName,
6709 : const char *pszGeomFieldName, OGRLayer *poLayer,
6710 : const std::map<std::string, std::string> &oMapArrowFieldNameToOGRFieldName,
6711 : const struct ArrowSchema *&schemaFIDColumn,
6712 : struct ArrowArray *&arrayFIDColumn)
6713 : {
6714 779 : const char *fieldName = schema->name;
6715 779 : const char *format = schema->format;
6716 779 : if (IsStructure(format))
6717 : {
6718 9 : if (IsArrowTimeStampWithOffsetField(schema))
6719 : {
6720 0 : FieldInfo sInfo;
6721 0 : sInfo.osName = fieldName;
6722 0 : sInfo.format = "+s";
6723 0 : sInfo.eNominalFieldType = OFTDateTime;
6724 : const std::string &osExpectedOGRFieldName =
6725 0 : [&oMapArrowFieldNameToOGRFieldName,
6726 0 : &sInfo]() -> const std::string &
6727 : {
6728 : const auto oIter =
6729 0 : oMapArrowFieldNameToOGRFieldName.find(sInfo.osName);
6730 0 : if (oIter != oMapArrowFieldNameToOGRFieldName.end())
6731 0 : return oIter->second;
6732 0 : return sInfo.osName;
6733 0 : }();
6734 0 : sInfo.iOGRFieldIdx =
6735 0 : poFeatureDefn->GetFieldIndex(osExpectedOGRFieldName.c_str());
6736 0 : if (sInfo.iOGRFieldIdx >= 0)
6737 : {
6738 : const auto eOGRType =
6739 0 : poFeatureDefn->GetFieldDefn(sInfo.iOGRFieldIdx)->GetType();
6740 0 : sInfo.eTargetFieldType = eOGRType;
6741 : }
6742 0 : asFieldInfo.emplace_back(std::move(sInfo));
6743 : }
6744 : else
6745 : {
6746 9 : const std::string osNewPrefix(osFieldPrefix + fieldName + ".");
6747 39 : for (int64_t i = 0; i < array->n_children; ++i)
6748 : {
6749 30 : if (!BuildOGRFieldInfo(
6750 30 : schema->children[i], array->children[i], poFeatureDefn,
6751 : osNewPrefix, aosNativeTypes, bFallbackTypesUsed,
6752 : asFieldInfo, pszFIDName, pszGeomFieldName, poLayer,
6753 : oMapArrowFieldNameToOGRFieldName, schemaFIDColumn,
6754 : arrayFIDColumn))
6755 : {
6756 0 : return false;
6757 : }
6758 : }
6759 : }
6760 9 : return true;
6761 : }
6762 :
6763 1540 : FieldInfo sInfo;
6764 :
6765 789 : if (schema->dictionary &&
6766 19 : !IsKnownCodedFieldDomain(poLayer, schema->metadata))
6767 : {
6768 15 : if (!IsValidDictionaryIndexType(format))
6769 : {
6770 0 : CPLError(CE_Failure, CPLE_NotSupported,
6771 : "Dictionary only supported if the parent is of "
6772 : "type [U]Int[8|16|32|64]");
6773 0 : return false;
6774 : }
6775 :
6776 15 : sInfo.bUseDictionary = true;
6777 15 : schema = schema->dictionary;
6778 15 : format = schema->format;
6779 15 : array = array->dictionary;
6780 : }
6781 :
6782 770 : sInfo.osName = osFieldPrefix + fieldName;
6783 770 : sInfo.format = format;
6784 770 : if (pszFIDName && sInfo.osName == pszFIDName)
6785 : {
6786 35 : if (IsInt32(format) || IsInt64(format))
6787 : {
6788 34 : sInfo.iOGRFieldIdx = FID_COLUMN_SPECIAL_OGR_FIELD_IDX;
6789 34 : schemaFIDColumn = schema;
6790 34 : arrayFIDColumn = array;
6791 : }
6792 : else
6793 : {
6794 1 : CPLError(CE_Failure, CPLE_AppDefined,
6795 : "FID column '%s' should be of Arrow format 'i' "
6796 : "(int32) or 'l' (int64)",
6797 : sInfo.osName.c_str());
6798 1 : return false;
6799 : }
6800 : }
6801 : else
6802 : {
6803 : const std::string &osExpectedOGRFieldName =
6804 2204 : [&oMapArrowFieldNameToOGRFieldName, &sInfo]() -> const std::string &
6805 : {
6806 : const auto oIter =
6807 735 : oMapArrowFieldNameToOGRFieldName.find(sInfo.osName);
6808 735 : if (oIter != oMapArrowFieldNameToOGRFieldName.end())
6809 1 : return oIter->second;
6810 734 : return sInfo.osName;
6811 735 : }();
6812 735 : sInfo.iOGRFieldIdx =
6813 735 : poFeatureDefn->GetFieldIndex(osExpectedOGRFieldName.c_str());
6814 735 : if (sInfo.iOGRFieldIdx >= 0)
6815 : {
6816 655 : bool bTypeOK = false;
6817 : const auto eOGRType =
6818 655 : poFeatureDefn->GetFieldDefn(sInfo.iOGRFieldIdx)->GetType();
6819 655 : sInfo.eTargetFieldType = eOGRType;
6820 11706 : for (const auto &sType : gasArrowTypesToOGR)
6821 : {
6822 11363 : if (strcmp(format, sType.arrowType) == 0)
6823 : {
6824 312 : sInfo.bUseStringOptim = sType.eType == OFTString;
6825 312 : sInfo.eNominalFieldType = sType.eType;
6826 312 : if (eOGRType == sInfo.eNominalFieldType)
6827 : {
6828 281 : bTypeOK = true;
6829 281 : break;
6830 : }
6831 31 : else if (eOGRType == OFTString)
6832 : {
6833 4 : bFallbackTypesUsed = true;
6834 4 : bTypeOK = true;
6835 4 : break;
6836 : }
6837 27 : else if (eOGRType == OFTInteger &&
6838 10 : sType.eType == OFTInteger64)
6839 : {
6840 : // Potentially lossy.
6841 4 : CPLDebug("OGR",
6842 : "For field %s, writing from Arrow array of "
6843 : "type Int64 into OGR Int32 field. "
6844 : "Potentially loss conversion can happen",
6845 : sInfo.osName.c_str());
6846 4 : bFallbackTypesUsed = true;
6847 4 : bTypeOK = true;
6848 4 : break;
6849 : }
6850 23 : else if (eOGRType == OFTInteger && sType.eType == OFTReal)
6851 : {
6852 : // Potentially lossy.
6853 6 : CPLDebug("OGR",
6854 : "For field %s, writing from Arrow array of "
6855 : "type Real into OGR Int32 field. "
6856 : "Potentially loss conversion can happen",
6857 : sInfo.osName.c_str());
6858 6 : bFallbackTypesUsed = true;
6859 6 : bTypeOK = true;
6860 6 : break;
6861 : }
6862 17 : else if (eOGRType == OFTInteger64 && sType.eType == OFTReal)
6863 : {
6864 : // Potentially lossy.
6865 6 : CPLDebug("OGR",
6866 : "For field %s, writing from Arrow array of "
6867 : "type Real into OGR Int64 field. "
6868 : "Potentially loss conversion can happen",
6869 : sInfo.osName.c_str());
6870 6 : bFallbackTypesUsed = true;
6871 6 : bTypeOK = true;
6872 6 : break;
6873 : }
6874 11 : else if (eOGRType == OFTReal && sType.eType == OFTInteger64)
6875 : {
6876 : // Potentially lossy.
6877 4 : CPLDebug("OGR",
6878 : "For field %s, writing from Arrow array of "
6879 : "type Int64 into OGR Real field. "
6880 : "Potentially loss conversion can happen",
6881 : sInfo.osName.c_str());
6882 4 : bFallbackTypesUsed = true;
6883 4 : bTypeOK = true;
6884 4 : break;
6885 : }
6886 7 : else if ((eOGRType == OFTInteger64 ||
6887 4 : eOGRType == OFTReal) &&
6888 4 : sType.eType == OFTInteger)
6889 : {
6890 : // Non-lossy
6891 4 : bFallbackTypesUsed = true;
6892 4 : bTypeOK = true;
6893 4 : break;
6894 : }
6895 3 : else if (eOGRType == OFTDateTime &&
6896 3 : sType.eType == OFTString)
6897 : {
6898 3 : bFallbackTypesUsed = true;
6899 3 : bTypeOK = true;
6900 3 : break;
6901 : }
6902 : else
6903 : {
6904 0 : CPLError(CE_Failure, CPLE_AppDefined,
6905 : "For field %s, OGR field type is %s whereas "
6906 : "Arrow type implies %s",
6907 : sInfo.osName.c_str(),
6908 : OGR_GetFieldTypeName(eOGRType),
6909 0 : OGR_GetFieldTypeName(sType.eType));
6910 0 : return false;
6911 : }
6912 : }
6913 : }
6914 :
6915 655 : if (!bTypeOK && IsMap(format))
6916 : {
6917 106 : sInfo.eNominalFieldType = OFTString;
6918 106 : if (eOGRType == sInfo.eNominalFieldType)
6919 : {
6920 106 : bTypeOK = true;
6921 : }
6922 : else
6923 : {
6924 0 : CPLError(CE_Failure, CPLE_AppDefined,
6925 : "For field %s, OGR field type is %s whereas "
6926 : "Arrow type implies %s",
6927 : sInfo.osName.c_str(),
6928 : OGR_GetFieldTypeName(eOGRType),
6929 : OGR_GetFieldTypeName(OFTString));
6930 0 : return false;
6931 : }
6932 : }
6933 :
6934 655 : if (!bTypeOK && IsTimestamp(format))
6935 : {
6936 32 : sInfo.eNominalFieldType = OFTDateTime;
6937 32 : if (eOGRType == sInfo.eNominalFieldType)
6938 : {
6939 31 : bTypeOK = true;
6940 : }
6941 1 : else if (eOGRType == OFTString)
6942 : {
6943 1 : bFallbackTypesUsed = true;
6944 1 : bTypeOK = true;
6945 : }
6946 : else
6947 : {
6948 0 : CPLError(CE_Failure, CPLE_AppDefined,
6949 : "For field %s, OGR field type is %s whereas "
6950 : "Arrow type implies %s",
6951 : sInfo.osName.c_str(),
6952 : OGR_GetFieldTypeName(eOGRType),
6953 : OGR_GetFieldTypeName(OFTDateTime));
6954 0 : return false;
6955 : }
6956 : }
6957 :
6958 655 : if (!bTypeOK && IsFixedWidthBinary(format))
6959 : {
6960 5 : sInfo.eNominalFieldType = OFTBinary;
6961 5 : if (eOGRType == sInfo.eNominalFieldType)
6962 : {
6963 5 : bTypeOK = true;
6964 : }
6965 0 : else if (eOGRType == OFTString)
6966 : {
6967 0 : bFallbackTypesUsed = true;
6968 0 : bTypeOK = true;
6969 : }
6970 : else
6971 : {
6972 0 : CPLError(CE_Failure, CPLE_AppDefined,
6973 : "For field %s, OGR field type is %s whereas "
6974 : "Arrow type implies %s",
6975 : sInfo.osName.c_str(),
6976 : OGR_GetFieldTypeName(eOGRType),
6977 : OGR_GetFieldTypeName(OFTBinary));
6978 0 : return false;
6979 : }
6980 : }
6981 :
6982 728 : if (!bTypeOK && (IsList(format) || IsLargeList(format) ||
6983 73 : IsFixedSizeList(format)))
6984 : {
6985 190 : const char *childFormat = schema->children[0]->format;
6986 1565 : for (const auto &sType : gasListTypes)
6987 : {
6988 1544 : if (childFormat[0] == sType.arrowLetter &&
6989 169 : childFormat[1] == 0)
6990 : {
6991 169 : sInfo.eNominalFieldType = sType.eType;
6992 169 : if (eOGRType == sInfo.eNominalFieldType)
6993 : {
6994 154 : bTypeOK = true;
6995 154 : break;
6996 : }
6997 15 : else if (eOGRType == OFTString)
6998 : {
6999 15 : bFallbackTypesUsed = true;
7000 15 : bTypeOK = true;
7001 15 : break;
7002 : }
7003 : else
7004 : {
7005 0 : CPLError(CE_Failure, CPLE_AppDefined,
7006 : "For field %s, OGR field type is %s "
7007 : "whereas "
7008 : "Arrow type implies %s",
7009 : sInfo.osName.c_str(),
7010 : OGR_GetFieldTypeName(eOGRType),
7011 0 : OGR_GetFieldTypeName(sType.eType));
7012 0 : return false;
7013 : }
7014 : }
7015 : }
7016 :
7017 190 : if (!bTypeOK && IsDecimal(childFormat))
7018 : {
7019 11 : if (!ParseDecimalFormat(childFormat, sInfo.nPrecision,
7020 : sInfo.nScale, sInfo.nWidthInBytes))
7021 : {
7022 0 : CPLError(CE_Failure, CPLE_AppDefined, "%s",
7023 0 : (std::string("Invalid field format ") +
7024 0 : childFormat + " for field " + osFieldPrefix +
7025 : fieldName)
7026 : .c_str());
7027 0 : return false;
7028 : }
7029 :
7030 11 : const char *pszError = GetErrorIfUnsupportedDecimal(
7031 : sInfo.nWidthInBytes, sInfo.nPrecision);
7032 11 : if (pszError)
7033 : {
7034 0 : CPLError(CE_Failure, CPLE_NotSupported, "%s", pszError);
7035 0 : return false;
7036 : }
7037 :
7038 11 : sInfo.eNominalFieldType = OFTRealList;
7039 11 : if (eOGRType == sInfo.eNominalFieldType)
7040 : {
7041 11 : bTypeOK = true;
7042 : }
7043 0 : else if (eOGRType == OFTString)
7044 : {
7045 0 : bFallbackTypesUsed = true;
7046 0 : bTypeOK = true;
7047 : }
7048 : else
7049 : {
7050 0 : CPLError(CE_Failure, CPLE_AppDefined,
7051 : "For field %s, OGR field type is %s whereas "
7052 : "Arrow type implies %s",
7053 : sInfo.osName.c_str(),
7054 : OGR_GetFieldTypeName(eOGRType),
7055 : OGR_GetFieldTypeName(OFTRealList));
7056 0 : return false;
7057 : }
7058 : }
7059 :
7060 190 : if (!bTypeOK && IsSupportForJSONObj(schema->children[0]))
7061 : {
7062 10 : sInfo.eNominalFieldType = OFTString;
7063 10 : if (eOGRType == sInfo.eNominalFieldType)
7064 : {
7065 10 : bTypeOK = true;
7066 : }
7067 : else
7068 : {
7069 0 : CPLError(CE_Failure, CPLE_AppDefined,
7070 : "For field %s, OGR field type is %s whereas "
7071 : "Arrow type implies %s",
7072 : sInfo.osName.c_str(),
7073 : OGR_GetFieldTypeName(eOGRType),
7074 : OGR_GetFieldTypeName(OFTString));
7075 0 : return false;
7076 : }
7077 : }
7078 :
7079 190 : if (!bTypeOK)
7080 : {
7081 0 : CPLError(CE_Failure, CPLE_NotSupported, "%s",
7082 0 : ("List of type '" + std::string(childFormat) +
7083 0 : "' for field " + osFieldPrefix + fieldName +
7084 : " is not supported.")
7085 : .c_str());
7086 0 : return false;
7087 : }
7088 : }
7089 :
7090 655 : if (!bTypeOK && IsDecimal(format))
7091 : {
7092 10 : if (!ParseDecimalFormat(format, sInfo.nPrecision, sInfo.nScale,
7093 : sInfo.nWidthInBytes))
7094 : {
7095 0 : CPLError(CE_Failure, CPLE_AppDefined, "%s",
7096 0 : (std::string("Invalid field format ") + format +
7097 0 : " for field " + osFieldPrefix + fieldName)
7098 : .c_str());
7099 0 : return false;
7100 : }
7101 :
7102 10 : const char *pszError = GetErrorIfUnsupportedDecimal(
7103 : sInfo.nWidthInBytes, sInfo.nPrecision);
7104 10 : if (pszError)
7105 : {
7106 0 : CPLError(CE_Failure, CPLE_NotSupported, "%s", pszError);
7107 0 : return false;
7108 : }
7109 :
7110 10 : sInfo.eNominalFieldType = OFTReal;
7111 10 : if (eOGRType == sInfo.eNominalFieldType)
7112 : {
7113 10 : bTypeOK = true;
7114 : }
7115 0 : else if (eOGRType == OFTString)
7116 : {
7117 0 : bFallbackTypesUsed = true;
7118 0 : bTypeOK = true;
7119 : }
7120 : else
7121 : {
7122 0 : CPLError(CE_Failure, CPLE_AppDefined,
7123 : "For field %s, OGR field type is %s whereas "
7124 : "Arrow type implies %s",
7125 : sInfo.osName.c_str(),
7126 : OGR_GetFieldTypeName(eOGRType),
7127 : OGR_GetFieldTypeName(OFTReal));
7128 0 : return false;
7129 : }
7130 : }
7131 :
7132 655 : if (!bTypeOK)
7133 : {
7134 0 : CPLError(CE_Failure, CPLE_NotSupported, "%s",
7135 0 : ("Type '" + std::string(format) + "' for field " +
7136 0 : osFieldPrefix + fieldName + " is not supported.")
7137 : .c_str());
7138 0 : return false;
7139 : }
7140 : }
7141 : else
7142 : {
7143 80 : sInfo.iOGRFieldIdx = poFeatureDefn->GetGeomFieldIndex(
7144 80 : osExpectedOGRFieldName.c_str());
7145 80 : if (sInfo.iOGRFieldIdx < 0)
7146 : {
7147 52 : if (pszGeomFieldName && pszGeomFieldName == sInfo.osName)
7148 : {
7149 47 : if (poFeatureDefn->GetGeomFieldCount() == 0)
7150 : {
7151 0 : CPLError(CE_Failure, CPLE_AppDefined,
7152 : "Cannot find OGR geometry field for Arrow "
7153 : "array %s",
7154 : sInfo.osName.c_str());
7155 0 : return false;
7156 : }
7157 47 : sInfo.iOGRFieldIdx = 0;
7158 : }
7159 : else
7160 : {
7161 : // Check if ARROW:extension:name = ogc.wkb or geoarrow.wkb
7162 5 : const char *pabyMetadata = schema->metadata;
7163 5 : if (pabyMetadata)
7164 : {
7165 : const auto oMetadata =
7166 5 : OGRParseArrowMetadata(pabyMetadata);
7167 5 : auto oIter = oMetadata.find(ARROW_EXTENSION_NAME_KEY);
7168 10 : if (oIter != oMetadata.end() &&
7169 5 : (oIter->second == EXTENSION_NAME_OGC_WKB ||
7170 0 : oIter->second == EXTENSION_NAME_GEOARROW_WKB))
7171 : {
7172 5 : if (poFeatureDefn->GetGeomFieldCount() == 0)
7173 : {
7174 0 : CPLError(CE_Failure, CPLE_AppDefined,
7175 : "Cannot find OGR geometry field "
7176 : "for Arrow array %s",
7177 : sInfo.osName.c_str());
7178 0 : return false;
7179 : }
7180 5 : sInfo.iOGRFieldIdx = 0;
7181 : }
7182 : }
7183 : }
7184 :
7185 52 : if (sInfo.iOGRFieldIdx < 0)
7186 : {
7187 0 : CPLError(CE_Failure, CPLE_AppDefined,
7188 : "Cannot find OGR field for Arrow array %s",
7189 : sInfo.osName.c_str());
7190 0 : return false;
7191 : }
7192 : }
7193 :
7194 80 : if (!IsBinary(format) && !IsLargeBinary(format))
7195 : {
7196 0 : CPLError(CE_Failure, CPLE_AppDefined,
7197 : "Geometry column '%s' should be of Arrow format "
7198 : "'z' (binary) or 'Z' (large binary)",
7199 : sInfo.osName.c_str());
7200 0 : return false;
7201 : }
7202 80 : sInfo.bIsGeomCol = true;
7203 : }
7204 : }
7205 :
7206 769 : asFieldInfo.emplace_back(std::move(sInfo));
7207 769 : return true;
7208 : }
7209 :
7210 : /************************************************************************/
7211 : /* GetUInt64Value() */
7212 : /************************************************************************/
7213 :
7214 90 : static inline uint64_t GetUInt64Value(const struct ArrowSchema *schema,
7215 : const struct ArrowArray *array,
7216 : size_t iFeature)
7217 : {
7218 90 : uint64_t nVal = 0;
7219 90 : CPLAssert(schema->format[1] == 0);
7220 90 : switch (schema->format[0])
7221 : {
7222 8 : case ARROW_LETTER_INT8:
7223 8 : nVal = GetValue<int8_t>(array, iFeature);
7224 8 : break;
7225 8 : case ARROW_LETTER_UINT8:
7226 8 : nVal = GetValue<uint8_t>(array, iFeature);
7227 8 : break;
7228 8 : case ARROW_LETTER_INT16:
7229 8 : nVal = GetValue<int16_t>(array, iFeature);
7230 8 : break;
7231 8 : case ARROW_LETTER_UINT16:
7232 8 : nVal = GetValue<uint16_t>(array, iFeature);
7233 8 : break;
7234 34 : case ARROW_LETTER_INT32:
7235 34 : nVal = GetValue<int32_t>(array, iFeature);
7236 34 : break;
7237 8 : case ARROW_LETTER_UINT32:
7238 8 : nVal = GetValue<uint32_t>(array, iFeature);
7239 8 : break;
7240 8 : case ARROW_LETTER_INT64:
7241 8 : nVal = GetValue<int64_t>(array, iFeature);
7242 8 : break;
7243 8 : case ARROW_LETTER_UINT64:
7244 8 : nVal = GetValue<uint64_t>(array, iFeature);
7245 8 : break;
7246 0 : default:
7247 : // Shouldn't happen given checks in BuildOGRFieldInfo()
7248 0 : CPLAssert(false);
7249 : break;
7250 : }
7251 90 : return nVal;
7252 : }
7253 :
7254 : /************************************************************************/
7255 : /* GetWorkingBufferSize() */
7256 : /************************************************************************/
7257 :
7258 1382830 : static size_t GetWorkingBufferSize(const struct ArrowSchema *schema,
7259 : const struct ArrowArray *array,
7260 : size_t iFeature, int &iArrowIdxInOut,
7261 : const std::vector<FieldInfo> &asFieldInfo)
7262 : {
7263 1382830 : const char *fieldName = schema->name;
7264 1382830 : const char *format = schema->format;
7265 1382830 : const int iArrowIdx = iArrowIdxInOut;
7266 1382830 : if (IsStructure(format))
7267 : {
7268 60426 : if (asFieldInfo[iArrowIdx].eNominalFieldType == OFTDateTime)
7269 : {
7270 0 : ++iArrowIdxInOut;
7271 0 : return 0;
7272 : }
7273 :
7274 60426 : size_t nRet = 0;
7275 1382850 : for (int64_t i = 0; i < array->n_children; ++i)
7276 : {
7277 1322420 : nRet += GetWorkingBufferSize(
7278 1322420 : schema->children[i], array->children[i],
7279 1322420 : iFeature + static_cast<size_t>(array->offset), iArrowIdxInOut,
7280 : asFieldInfo);
7281 : }
7282 60426 : return nRet;
7283 : }
7284 1322400 : ++iArrowIdxInOut;
7285 :
7286 1322400 : if (!asFieldInfo[iArrowIdx].bUseStringOptim)
7287 122188 : return 0;
7288 :
7289 1200210 : const uint8_t *pabyValidity =
7290 1200210 : static_cast<const uint8_t *>(array->buffers[0]);
7291 1200340 : if (array->null_count != 0 && pabyValidity &&
7292 123 : !TestBit(pabyValidity, static_cast<size_t>(iFeature + array->offset)))
7293 : {
7294 : // empty string
7295 57 : return 0;
7296 : }
7297 :
7298 1200160 : if (asFieldInfo[iArrowIdx].bUseDictionary)
7299 : {
7300 41 : const uint64_t nDictIdx = GetUInt64Value(schema, array, iFeature);
7301 41 : const auto dictArray = array->dictionary;
7302 41 : if (nDictIdx >= static_cast<uint64_t>(dictArray->length))
7303 : {
7304 1 : CPLError(CE_Failure, CPLE_AppDefined,
7305 : "Feature %" PRIu64
7306 : ", field %s: invalid dictionary index: %" PRIu64,
7307 : static_cast<uint64_t>(iFeature), fieldName, nDictIdx);
7308 1 : return 0;
7309 : }
7310 :
7311 40 : array = dictArray;
7312 40 : schema = schema->dictionary;
7313 40 : format = schema->format;
7314 40 : iFeature = static_cast<size_t>(nDictIdx);
7315 : }
7316 :
7317 1200160 : if (IsString(format))
7318 : {
7319 1200150 : const auto *panOffsets =
7320 1200150 : static_cast<const uint32_t *>(array->buffers[1]) + array->offset;
7321 1200150 : return 1 + (panOffsets[iFeature + 1] - panOffsets[iFeature]);
7322 : }
7323 10 : else if (IsLargeString(format))
7324 : {
7325 10 : const auto *panOffsets =
7326 10 : static_cast<const uint64_t *>(array->buffers[1]) + array->offset;
7327 10 : return 1 + static_cast<size_t>(panOffsets[iFeature + 1] -
7328 10 : panOffsets[iFeature]);
7329 : }
7330 0 : return 0;
7331 : }
7332 :
7333 : /************************************************************************/
7334 : /* FillField() */
7335 : /************************************************************************/
7336 :
7337 : template <typename ArrowType, typename OGRType = ArrowType>
7338 462 : inline static void FillField(const struct ArrowArray *array, int iOGRFieldIdx,
7339 : size_t iFeature, OGRFeature &oFeature)
7340 : {
7341 462 : const auto *panValues = static_cast<const ArrowType *>(array->buffers[1]);
7342 462 : oFeature.SetFieldSameTypeUnsafe(
7343 : iOGRFieldIdx,
7344 462 : static_cast<OGRType>(panValues[iFeature + array->offset]));
7345 462 : }
7346 :
7347 : /************************************************************************/
7348 : /* FillFieldString() */
7349 : /************************************************************************/
7350 :
7351 : template <typename OffsetType>
7352 : inline static void
7353 1200160 : FillFieldString(const struct ArrowArray *array, int iOGRFieldIdx,
7354 : size_t iFeature, int iArrowIdx,
7355 : const std::vector<FieldInfo> &asFieldInfo,
7356 : std::string &osWorkingBuffer, OGRFeature &oFeature)
7357 : {
7358 1200160 : const auto *panOffsets =
7359 1200160 : static_cast<const OffsetType *>(array->buffers[1]) + array->offset;
7360 1200160 : const char *pszStr = static_cast<const char *>(array->buffers[2]);
7361 1200160 : const size_t nLen =
7362 1200160 : static_cast<size_t>(panOffsets[iFeature + 1] - panOffsets[iFeature]);
7363 1200160 : if (asFieldInfo[iArrowIdx].bUseStringOptim)
7364 : {
7365 1200160 : oFeature.SetFieldSameTypeUnsafe(
7366 1200160 : iOGRFieldIdx, &osWorkingBuffer[0] + osWorkingBuffer.size());
7367 1200160 : osWorkingBuffer.append(pszStr + panOffsets[iFeature], nLen);
7368 1200160 : osWorkingBuffer.push_back(0); // append null character
7369 : }
7370 : else
7371 : {
7372 0 : const std::string osTmp(pszStr, nLen);
7373 0 : oFeature.SetField(iOGRFieldIdx, osTmp.c_str());
7374 : }
7375 1200160 : }
7376 :
7377 : /************************************************************************/
7378 : /* FillFieldBinary() */
7379 : /************************************************************************/
7380 :
7381 : template <typename OffsetType>
7382 : inline static bool
7383 60344 : FillFieldBinary(const struct ArrowArray *array, int iOGRFieldIdx,
7384 : size_t iFeature, int iArrowIdx,
7385 : const std::vector<FieldInfo> &asFieldInfo,
7386 : const std::string &osFieldPrefix, const char *pszFieldName,
7387 : OGRFeature &oFeature)
7388 : {
7389 60344 : const auto *panOffsets =
7390 60344 : static_cast<const OffsetType *>(array->buffers[1]) + array->offset;
7391 60344 : const GByte *pabyData = static_cast<const GByte *>(array->buffers[2]) +
7392 60344 : static_cast<size_t>(panOffsets[iFeature]);
7393 60344 : const size_t nLen =
7394 60344 : static_cast<size_t>(panOffsets[iFeature + 1] - panOffsets[iFeature]);
7395 60344 : if (asFieldInfo[iArrowIdx].bIsGeomCol)
7396 : {
7397 60316 : size_t nBytesConsumedOut = 0;
7398 :
7399 : // Check if we can reuse the existing geometry, to save dynamic memory
7400 : // allocations.
7401 60316 : if (nLen >= 5 && pabyData[0] == wkbNDR && pabyData[1] <= wkbTriangle &&
7402 60309 : pabyData[2] == 0 && pabyData[3] == 0 && pabyData[4] == 0)
7403 : {
7404 60309 : const auto poExistingGeom = oFeature.GetGeomFieldRef(iOGRFieldIdx);
7405 120570 : if (poExistingGeom &&
7406 60261 : poExistingGeom->getGeometryType() == pabyData[1])
7407 : {
7408 60261 : poExistingGeom->importFromWkb(pabyData, nLen, wkbVariantIso,
7409 : nBytesConsumedOut);
7410 60261 : return true;
7411 : }
7412 : }
7413 :
7414 55 : OGRGeometry *poGeometry = nullptr;
7415 55 : OGRGeometryFactory::createFromWkb(pabyData, nullptr, &poGeometry, nLen,
7416 : wkbVariantIso, nBytesConsumedOut);
7417 55 : oFeature.SetGeomFieldDirectly(iOGRFieldIdx, poGeometry);
7418 : }
7419 : else
7420 : {
7421 28 : if (nLen > static_cast<size_t>(std::numeric_limits<int>::max()))
7422 : {
7423 0 : CPLError(CE_Failure, CPLE_NotSupported,
7424 : "Content for field %s%s is too large",
7425 : osFieldPrefix.c_str(), pszFieldName);
7426 0 : return false;
7427 : }
7428 28 : oFeature.SetField(iOGRFieldIdx, static_cast<int>(nLen), pabyData);
7429 : }
7430 83 : return true;
7431 : }
7432 :
7433 : /************************************************************************/
7434 : /* FillFeature() */
7435 : /************************************************************************/
7436 :
7437 1322420 : static bool FillFeature(OGRLayer *poLayer, const struct ArrowSchema *schema,
7438 : const struct ArrowArray *array,
7439 : const std::string &osFieldPrefix, size_t iFeature,
7440 : int &iArrowIdxInOut,
7441 : const std::vector<FieldInfo> &asFieldInfo,
7442 : OGRFeature &oFeature, std::string &osWorkingBuffer)
7443 :
7444 : {
7445 1322420 : const char *fieldName = schema->name;
7446 1322420 : const char *format = schema->format;
7447 1322420 : const int iArrowIdx = iArrowIdxInOut;
7448 1322420 : if (IsStructure(format))
7449 : {
7450 19 : if (asFieldInfo[iArrowIdx].eNominalFieldType == OFTDateTime)
7451 : {
7452 0 : ++iArrowIdxInOut;
7453 0 : const int iOGRFieldIdx = asFieldInfo[iArrowIdx].iOGRFieldIdx;
7454 :
7455 0 : if (array->null_count != 0)
7456 : {
7457 0 : const uint8_t *pabyValidity =
7458 0 : static_cast<const uint8_t *>(array->buffers[0]);
7459 0 : if (pabyValidity &&
7460 0 : !TestBit(pabyValidity,
7461 0 : static_cast<size_t>(iFeature + array->offset)))
7462 : {
7463 0 : OGRField *psField = oFeature.GetRawFieldRef(iOGRFieldIdx);
7464 0 : OGR_RawField_SetNull(psField);
7465 : }
7466 : }
7467 :
7468 0 : const auto *panTimestamps =
7469 0 : static_cast<const int64_t *>(array->children[0]->buffers[1]);
7470 0 : int64_t nTimestamp = panTimestamps[iFeature + array->offset];
7471 0 : const auto *panOffsetsMinutes =
7472 0 : static_cast<const int16_t *>(array->children[1]->buffers[1]);
7473 0 : const int nOffsetMinute =
7474 0 : panOffsetsMinutes[iFeature + array->offset];
7475 0 : const int nTZFlag =
7476 0 : nOffsetMinute >= -14 * 60 && nOffsetMinute <= 14 * 60
7477 0 : ? OGR_TZFLAG_UTC + nOffsetMinute / 15
7478 : : OGR_TZFLAG_UTC;
7479 :
7480 0 : const char *formatTS = schema->children[0]->format;
7481 : const int nInvFactorToSecond =
7482 0 : IsTimestampSeconds(formatTS) ? 1
7483 0 : : IsTimestampMilliseconds(formatTS) ? 1000
7484 0 : : IsTimestampMicroseconds(formatTS) ? 1000 * 1000
7485 0 : : IsTimestampNanoseconds(formatTS) ? 1000 * 1000 * 1000
7486 0 : : 1;
7487 0 : double floatingPart = 0;
7488 0 : if (nInvFactorToSecond)
7489 : {
7490 0 : floatingPart = (nTimestamp % nInvFactorToSecond) /
7491 0 : double(nInvFactorToSecond);
7492 0 : nTimestamp /= nInvFactorToSecond;
7493 : }
7494 0 : nTimestamp += (nTZFlag - OGR_TZFLAG_UTC) * 15 * 60;
7495 : struct tm dt;
7496 0 : CPLUnixTimeToYMDHMS(nTimestamp, &dt);
7497 0 : oFeature.SetField(iOGRFieldIdx, dt.tm_year + 1900, dt.tm_mon + 1,
7498 : dt.tm_mday, dt.tm_hour, dt.tm_min,
7499 0 : static_cast<float>(dt.tm_sec + floatingPart),
7500 : nTZFlag);
7501 : }
7502 : else
7503 : {
7504 19 : const std::string osNewPrefix(osFieldPrefix + fieldName + ".");
7505 78 : for (int64_t i = 0; i < array->n_children; ++i)
7506 : {
7507 59 : if (!FillFeature(poLayer, schema->children[i],
7508 59 : array->children[i], osNewPrefix,
7509 59 : iFeature + static_cast<size_t>(array->offset),
7510 : iArrowIdxInOut, asFieldInfo, oFeature,
7511 : osWorkingBuffer))
7512 0 : return false;
7513 : }
7514 : }
7515 19 : return true;
7516 : }
7517 1322400 : ++iArrowIdxInOut;
7518 1322400 : const int iOGRFieldIdx = asFieldInfo[iArrowIdx].iOGRFieldIdx;
7519 :
7520 1322400 : if (asFieldInfo[iArrowIdx].bUseDictionary)
7521 : {
7522 62 : format = schema->dictionary->format;
7523 : }
7524 :
7525 1322400 : if (array->null_count != 0)
7526 : {
7527 1011 : const uint8_t *pabyValidity =
7528 1011 : static_cast<const uint8_t *>(array->buffers[0]);
7529 1973 : if (pabyValidity &&
7530 962 : !TestBit(pabyValidity,
7531 962 : static_cast<size_t>(iFeature + array->offset)))
7532 : {
7533 298 : if (iOGRFieldIdx == FID_COLUMN_SPECIAL_OGR_FIELD_IDX)
7534 5 : oFeature.SetFID(OGRNullFID);
7535 293 : else if (asFieldInfo[iArrowIdx].bIsGeomCol)
7536 70 : oFeature.SetGeomFieldDirectly(iOGRFieldIdx, nullptr);
7537 223 : else if (asFieldInfo[iArrowIdx].eSetFeatureFieldType == OFTString)
7538 : {
7539 120 : OGRField *psField = oFeature.GetRawFieldRef(iOGRFieldIdx);
7540 120 : if (!asFieldInfo[iArrowIdx].bUseStringOptim)
7541 : {
7542 63 : if (IsValidField(psField))
7543 : {
7544 51 : CPLFree(psField->String);
7545 51 : OGR_RawField_SetNull(psField);
7546 : }
7547 : }
7548 : else
7549 : {
7550 57 : OGR_RawField_SetNull(psField);
7551 : }
7552 : }
7553 : else
7554 : {
7555 103 : OGRField *psField = oFeature.GetRawFieldRef(iOGRFieldIdx);
7556 103 : switch (asFieldInfo[iArrowIdx].eSetFeatureFieldType)
7557 : {
7558 47 : case OFTRealList:
7559 : case OFTIntegerList:
7560 : case OFTInteger64List:
7561 47 : if (IsValidField(psField))
7562 47 : CPLFree(psField->IntegerList.paList);
7563 47 : break;
7564 :
7565 7 : case OFTStringList:
7566 7 : if (IsValidField(psField))
7567 7 : CSLDestroy(psField->StringList.paList);
7568 7 : break;
7569 :
7570 1 : case OFTBinary:
7571 1 : if (IsValidField(psField))
7572 1 : CPLFree(psField->Binary.paData);
7573 1 : break;
7574 :
7575 48 : default:
7576 48 : break;
7577 : }
7578 103 : OGR_RawField_SetNull(psField);
7579 : }
7580 298 : return true;
7581 : }
7582 : }
7583 :
7584 1322100 : if (asFieldInfo[iArrowIdx].bUseDictionary)
7585 : {
7586 49 : const uint64_t nDictIdx = GetUInt64Value(schema, array, iFeature);
7587 49 : auto dictArray = array->dictionary;
7588 49 : if (nDictIdx >= static_cast<uint64_t>(dictArray->length))
7589 : {
7590 2 : CPLError(CE_Failure, CPLE_AppDefined,
7591 : "Feature %" PRIu64
7592 : ", field %s: invalid dictionary index: %" PRIu64,
7593 : static_cast<uint64_t>(iFeature),
7594 4 : (osFieldPrefix + fieldName).c_str(), nDictIdx);
7595 2 : return false;
7596 : }
7597 47 : array = dictArray;
7598 47 : schema = schema->dictionary;
7599 47 : iFeature = static_cast<size_t>(nDictIdx);
7600 : }
7601 :
7602 1322100 : if (IsBoolean(format))
7603 : {
7604 12 : const uint8_t *pabyValues =
7605 12 : static_cast<const uint8_t *>(array->buffers[1]);
7606 12 : oFeature.SetFieldSameTypeUnsafe(
7607 : iOGRFieldIdx,
7608 12 : TestBit(pabyValues, static_cast<size_t>(iFeature + array->offset))
7609 : ? 1
7610 : : 0);
7611 12 : return true;
7612 : }
7613 1322090 : else if (IsInt8(format))
7614 : {
7615 10 : FillField<int8_t>(array, iOGRFieldIdx, iFeature, oFeature);
7616 10 : return true;
7617 : }
7618 1322080 : else if (IsUInt8(format))
7619 : {
7620 10 : FillField<uint8_t>(array, iOGRFieldIdx, iFeature, oFeature);
7621 10 : return true;
7622 : }
7623 1322070 : else if (IsInt16(format))
7624 : {
7625 12 : FillField<int16_t>(array, iOGRFieldIdx, iFeature, oFeature);
7626 12 : return true;
7627 : }
7628 1322060 : else if (IsUInt16(format))
7629 : {
7630 10 : FillField<uint16_t>(array, iOGRFieldIdx, iFeature, oFeature);
7631 10 : return true;
7632 : }
7633 1322050 : else if (IsInt32(format))
7634 : {
7635 250 : if (iOGRFieldIdx == FID_COLUMN_SPECIAL_OGR_FIELD_IDX)
7636 : {
7637 2 : const auto *panValues =
7638 2 : static_cast<const int32_t *>(array->buffers[1]);
7639 2 : oFeature.SetFID(panValues[iFeature + array->offset]);
7640 : }
7641 : else
7642 : {
7643 248 : FillField<int32_t>(array, iOGRFieldIdx, iFeature, oFeature);
7644 : }
7645 250 : return true;
7646 : }
7647 1321800 : else if (IsUInt32(format))
7648 : {
7649 4 : FillField<uint32_t, GIntBig>(array, iOGRFieldIdx, iFeature, oFeature);
7650 4 : return true;
7651 : }
7652 1321790 : else if (IsInt64(format))
7653 : {
7654 60392 : if (iOGRFieldIdx == FID_COLUMN_SPECIAL_OGR_FIELD_IDX)
7655 : {
7656 60304 : const auto *panValues =
7657 60304 : static_cast<const int64_t *>(array->buffers[1]);
7658 60304 : oFeature.SetFID(panValues[iFeature + array->offset]);
7659 : }
7660 : else
7661 : {
7662 88 : FillField<int64_t, GIntBig>(array, iOGRFieldIdx, iFeature,
7663 : oFeature);
7664 : }
7665 60392 : return true;
7666 : }
7667 1261400 : else if (IsUInt64(format))
7668 : {
7669 10 : FillField<uint64_t, double>(array, iOGRFieldIdx, iFeature, oFeature);
7670 10 : return true;
7671 : }
7672 1261390 : else if (IsFloat32(format))
7673 : {
7674 12 : FillField<float, double>(array, iOGRFieldIdx, iFeature, oFeature);
7675 12 : return true;
7676 : }
7677 1261380 : else if (IsFloat64(format))
7678 : {
7679 58 : FillField<double>(array, iOGRFieldIdx, iFeature, oFeature);
7680 58 : return true;
7681 : }
7682 1261320 : else if (IsString(format))
7683 : {
7684 1200150 : FillFieldString<uint32_t>(array, iOGRFieldIdx, iFeature, iArrowIdx,
7685 : asFieldInfo, osWorkingBuffer, oFeature);
7686 1200150 : return true;
7687 : }
7688 61176 : else if (IsLargeString(format))
7689 : {
7690 10 : FillFieldString<uint64_t>(array, iOGRFieldIdx, iFeature, iArrowIdx,
7691 : asFieldInfo, osWorkingBuffer, oFeature);
7692 10 : return true;
7693 : }
7694 61166 : else if (IsBinary(format))
7695 : {
7696 60328 : return FillFieldBinary<uint32_t>(array, iOGRFieldIdx, iFeature,
7697 : iArrowIdx, asFieldInfo, osFieldPrefix,
7698 60328 : fieldName, oFeature);
7699 : }
7700 838 : else if (IsLargeBinary(format))
7701 : {
7702 16 : return FillFieldBinary<uint64_t>(array, iOGRFieldIdx, iFeature,
7703 : iArrowIdx, asFieldInfo, osFieldPrefix,
7704 16 : fieldName, oFeature);
7705 : }
7706 822 : else if (asFieldInfo[iArrowIdx].nPrecision > 0)
7707 : {
7708 : // fits on a int64
7709 46 : CPLAssert(asFieldInfo[iArrowIdx].nPrecision <= 19);
7710 : // either 128 or 256 bits
7711 46 : CPLAssert((asFieldInfo[iArrowIdx].nWidthInBytes % 8) == 0);
7712 46 : const int nWidthIn64BitWord = asFieldInfo[iArrowIdx].nWidthInBytes / 8;
7713 :
7714 46 : if (IsList(format))
7715 : {
7716 16 : const auto panOffsets =
7717 16 : static_cast<const uint32_t *>(array->buffers[1]) +
7718 16 : array->offset;
7719 16 : const auto childArray = array->children[0];
7720 16 : std::vector<double> aValues;
7721 33 : for (auto i = panOffsets[iFeature]; i < panOffsets[iFeature + 1];
7722 : ++i)
7723 : {
7724 17 : aValues.push_back(GetValueDecimal(childArray, nWidthIn64BitWord,
7725 17 : asFieldInfo[iArrowIdx].nScale,
7726 : i));
7727 : }
7728 16 : oFeature.SetField(iOGRFieldIdx, static_cast<int>(aValues.size()),
7729 16 : aValues.data());
7730 16 : return true;
7731 : }
7732 30 : else if (IsLargeList(format))
7733 : {
7734 4 : const auto panOffsets =
7735 4 : static_cast<const uint64_t *>(array->buffers[1]) +
7736 4 : array->offset;
7737 4 : const auto childArray = array->children[0];
7738 4 : std::vector<double> aValues;
7739 4 : for (auto i = static_cast<size_t>(panOffsets[iFeature]);
7740 9 : i < static_cast<size_t>(panOffsets[iFeature + 1]); ++i)
7741 : {
7742 5 : aValues.push_back(GetValueDecimal(childArray, nWidthIn64BitWord,
7743 5 : asFieldInfo[iArrowIdx].nScale,
7744 : i));
7745 : }
7746 4 : oFeature.SetField(iOGRFieldIdx, static_cast<int>(aValues.size()),
7747 4 : aValues.data());
7748 4 : return true;
7749 : }
7750 26 : else if (IsFixedSizeList(format))
7751 : {
7752 4 : const int nVals = GetFixedSizeList(format);
7753 4 : const auto childArray = array->children[0];
7754 4 : std::vector<double> aValues;
7755 12 : for (int i = 0; i < nVals; ++i)
7756 : {
7757 8 : aValues.push_back(GetValueDecimal(childArray, nWidthIn64BitWord,
7758 8 : asFieldInfo[iArrowIdx].nScale,
7759 8 : iFeature * nVals + i));
7760 : }
7761 4 : oFeature.SetField(iOGRFieldIdx, nVals, aValues.data());
7762 4 : return true;
7763 : }
7764 :
7765 22 : CPLAssert(format[0] == ARROW_LETTER_DECIMAL);
7766 :
7767 22 : oFeature.SetFieldSameTypeUnsafe(
7768 : iOGRFieldIdx,
7769 : GetValueDecimal(array, nWidthIn64BitWord,
7770 22 : asFieldInfo[iArrowIdx].nScale, iFeature));
7771 22 : return true;
7772 : }
7773 776 : else if (SetFieldForOtherFormats(
7774 : oFeature, iOGRFieldIdx,
7775 776 : static_cast<size_t>(iFeature + array->offset), schema, array))
7776 : {
7777 776 : return true;
7778 : }
7779 :
7780 0 : CPLError(CE_Failure, CPLE_NotSupported, "%s",
7781 0 : ("Type '" + std::string(format) + "' for field " + osFieldPrefix +
7782 0 : fieldName + " is not supported.")
7783 : .c_str());
7784 0 : return false;
7785 : }
7786 :
7787 : /************************************************************************/
7788 : /* OGRLayer::WriteArrowBatch() */
7789 : /************************************************************************/
7790 :
7791 : // clang-format off
7792 : /** Writes a batch of rows from an ArrowArray.
7793 : *
7794 : * This is semantically close to calling CreateFeature() with multiple features
7795 : * at once.
7796 : *
7797 : * The ArrowArray must be of type struct (format=+s), and its children generally
7798 : * map to a OGR attribute or geometry field (unless they are struct themselves).
7799 : *
7800 : * Method IsArrowSchemaSupported() can be called to determine if the schema
7801 : * will be supported by WriteArrowBatch().
7802 : *
7803 : * OGR fields for the corresponding children arrays must exist and be of a
7804 : * compatible type. For attribute fields, they should generally be created with
7805 : * CreateFieldFromArrowSchema(). This is strictly required for output drivers
7806 : * Arrow or Parquet, and strongly recommended otherwise. For geometry fields,
7807 : * they should be created either implicitly at CreateLayer() type
7808 : * (if geom_type != wkbNone), or explicitly with CreateGeomField().
7809 : *
7810 : * Starting with GDAL 3.9, some tolerance has been introduced in the base
7811 : * implementation of WriteArrowBatch() for scenarios that involve appending to
7812 : * an already existing output layer when the input Arrow field type and the
7813 : * OGR layer field type are 32/64-bi integers or real number, but do not match
7814 : * exactly, which may cause lossy conversions. The IF_FIELD_NOT_PRESERVED option
7815 : * can be used to control the behavior in case of lossy conversion.
7816 : *
7817 : * Arrays for geometry columns should be of binary or large binary type and
7818 : * contain WKB geometry.
7819 : *
7820 : * Note that the passed array may be set to a released state
7821 : * (array->release==NULL) after this call (not by the base implementation,
7822 : * but in specialized ones such as Parquet or Arrow for example)
7823 : *
7824 : * Supported options of the base implementation are:
7825 : * <ul>
7826 : * <li>FID=name. Name of the FID column in the array. If not provided,
7827 : * GetFIDColumn() is used to determine it. The special name
7828 : * OGRLayer::DEFAULT_ARROW_FID_NAME is also recognized if neither FID nor
7829 : * GetFIDColumn() are set.
7830 : * The corresponding ArrowArray must be of type int32 (i) or int64 (l).
7831 : * On input, values of the FID column are used to create the feature.
7832 : * On output, the values of the FID column may be set with the FID of the
7833 : * created feature (if the array is not released).
7834 : * </li>
7835 : * <li>IF_FID_NOT_PRESERVED=NOTHING/ERROR/WARNING. Action to perform when the
7836 : * input FID is not preserved in the output layer. The default is NOTHING.
7837 : * Setting it to ERROR will cause the function to error out. Setting it
7838 : * to WARNING will cause the function to emit a warning but continue its
7839 : * processing.
7840 : * </li>
7841 : * <li>IF_FIELD_NOT_PRESERVED=ERROR/WARNING. (since GDAL 3.9)
7842 : * Action to perform when the input field value is not preserved in the
7843 : * output layer.
7844 : * The default is WARNING, which will cause the function to emit a warning
7845 : * but continue its processing.
7846 : * Setting it to ERROR will cause the function to error out if a lossy
7847 : * conversion is detected.
7848 : * </li>
7849 : * <li>GEOMETRY_NAME=name. Name of the geometry column. If not provided,
7850 : * GetGeometryColumn() is used. The special name
7851 : * OGRLayer::DEFAULT_ARROW_GEOMETRY_NAME is also recognized if neither
7852 : * GEOMETRY_NAME nor GetGeometryColumn() are set.
7853 : * Geometry columns are also identified if they have
7854 : * ARROW:extension:name=ogc.wkb as a field metadata.
7855 : * The corresponding ArrowArray must be of type binary (w) or large
7856 : * binary (W).
7857 : * </li>
7858 : * </ul>
7859 : *
7860 : * The following example demonstrates how to copy a layer from one format to
7861 : * another one (assuming it has at most a single geometry column):
7862 : \code{.py}
7863 : def copy_layer(src_lyr, out_filename, out_format, lcos = {}):
7864 : stream = src_lyr.GetArrowStream()
7865 : schema = stream.GetSchema()
7866 :
7867 : # If the source layer has a FID column and the output driver supports
7868 : # a FID layer creation option, set it to the source FID column name.
7869 : if src_lyr.GetFIDColumn():
7870 : creationOptions = gdal.GetDriverByName(out_format).GetMetadataItem(
7871 : "DS_LAYER_CREATIONOPTIONLIST"
7872 : )
7873 : if creationOptions and '"FID"' in creationOptions:
7874 : lcos["FID"] = src_lyr.GetFIDColumn()
7875 :
7876 : with ogr.GetDriverByName(out_format).CreateDataSource(out_filename) as out_ds:
7877 : if src_lyr.GetLayerDefn().GetGeomFieldCount() > 1:
7878 : out_lyr = out_ds.CreateLayer(
7879 : src_lyr.GetName(), geom_type=ogr.wkbNone, options=lcos
7880 : )
7881 : for i in range(src_lyr.GetLayerDefn().GetGeomFieldCount()):
7882 : out_lyr.CreateGeomField(src_lyr.GetLayerDefn().GetGeomFieldDefn(i))
7883 : else:
7884 : out_lyr = out_ds.CreateLayer(
7885 : src_lyr.GetName(),
7886 : geom_type=src_lyr.GetGeomType(),
7887 : srs=src_lyr.GetSpatialRef(),
7888 : options=lcos,
7889 : )
7890 :
7891 : success, error_msg = out_lyr.IsArrowSchemaSupported(schema)
7892 : assert success, error_msg
7893 :
7894 : src_geom_field_names = [
7895 : src_lyr.GetLayerDefn().GetGeomFieldDefn(i).GetName()
7896 : for i in range(src_lyr.GetLayerDefn().GetGeomFieldCount())
7897 : ]
7898 : for i in range(schema.GetChildrenCount()):
7899 : # GetArrowStream() may return "OGC_FID" for a unnamed source FID
7900 : # column and "wkb_geometry" for a unnamed source geometry column.
7901 : # Also test GetFIDColumn() and src_geom_field_names if they are
7902 : # named.
7903 : if (
7904 : schema.GetChild(i).GetName()
7905 : not in ("OGC_FID", "wkb_geometry", src_lyr.GetFIDColumn())
7906 : and schema.GetChild(i).GetName() not in src_geom_field_names
7907 : ):
7908 : out_lyr.CreateFieldFromArrowSchema(schema.GetChild(i))
7909 :
7910 : write_options = []
7911 : if src_lyr.GetFIDColumn():
7912 : write_options.append("FID=" + src_lyr.GetFIDColumn())
7913 : if (
7914 : src_lyr.GetLayerDefn().GetGeomFieldCount() == 1
7915 : and src_lyr.GetGeometryColumn()
7916 : ):
7917 : write_options.append("GEOMETRY_NAME=" + src_lyr.GetGeometryColumn())
7918 :
7919 : while True:
7920 : array = stream.GetNextRecordBatch()
7921 : if array is None:
7922 : break
7923 : out_lyr.WriteArrowBatch(schema, array, write_options)
7924 : \endcode
7925 : *
7926 : * This method and CreateFeature() are mutually exclusive in the same session.
7927 : *
7928 : * This method is the same as the C function OGR_L_WriteArrowBatch().
7929 : *
7930 : * @param schema Schema of array
7931 : * @param array Array of type struct. It may be released (array->release==NULL)
7932 : * after calling this method.
7933 : * @param papszOptions Options. Null terminated list, or nullptr.
7934 : * @return true in case of success
7935 : * @since 3.8
7936 : */
7937 : // clang-format on
7938 :
7939 88 : bool OGRLayer::WriteArrowBatch(const struct ArrowSchema *schema,
7940 : struct ArrowArray *array,
7941 : CSLConstList papszOptions)
7942 : {
7943 88 : const char *format = schema->format;
7944 88 : if (!IsStructure(format))
7945 : {
7946 0 : CPLError(CE_Failure, CPLE_AppDefined,
7947 : "WriteArrowBatch() should be called on a schema that is a "
7948 : "struct of fields");
7949 0 : return false;
7950 : }
7951 :
7952 88 : if (schema->n_children != array->n_children)
7953 : {
7954 0 : CPLError(CE_Failure, CPLE_AppDefined,
7955 : "WriteArrowBatch(): schema->n_children (%d) != "
7956 : "array->n_children (%d)",
7957 0 : int(schema->n_children), int(array->n_children));
7958 0 : return false;
7959 : }
7960 :
7961 176 : CPLStringList aosNativeTypes;
7962 88 : auto poDS = const_cast<OGRLayer *>(this)->GetDataset();
7963 88 : if (poDS)
7964 : {
7965 88 : auto poDriver = poDS->GetDriver();
7966 88 : if (poDriver)
7967 : {
7968 : const char *pszMetadataItem =
7969 88 : poDriver->GetMetadataItem(GDAL_DMD_CREATIONFIELDDATATYPES);
7970 88 : if (pszMetadataItem)
7971 88 : aosNativeTypes = CSLTokenizeString2(pszMetadataItem, " ", 0);
7972 : }
7973 : }
7974 :
7975 176 : std::vector<FieldInfo> asFieldInfo;
7976 88 : auto poLayerDefn = GetLayerDefn();
7977 : const char *pszFIDName =
7978 88 : CSLFetchNameValueDef(papszOptions, "FID", GetFIDColumn());
7979 88 : if (!pszFIDName || pszFIDName[0] == 0)
7980 60 : pszFIDName = DEFAULT_ARROW_FID_NAME;
7981 : const bool bErrorIfFIDNotPreserved =
7982 88 : EQUAL(CSLFetchNameValueDef(papszOptions, "IF_FID_NOT_PRESERVED", ""),
7983 : "ERROR");
7984 : const bool bWarningIfFIDNotPreserved =
7985 88 : EQUAL(CSLFetchNameValueDef(papszOptions, "IF_FID_NOT_PRESERVED", ""),
7986 : "WARNING");
7987 : const bool bErrorIfFieldNotPreserved =
7988 88 : EQUAL(CSLFetchNameValueDef(papszOptions, "IF_FIELD_NOT_PRESERVED", ""),
7989 : "ERROR");
7990 88 : const char *pszGeomFieldName = CSLFetchNameValueDef(
7991 88 : papszOptions, "GEOMETRY_NAME", GetGeometryColumn());
7992 88 : if (!pszGeomFieldName || pszGeomFieldName[0] == 0)
7993 59 : pszGeomFieldName = DEFAULT_ARROW_GEOMETRY_NAME;
7994 88 : const struct ArrowSchema *schemaFIDColumn = nullptr;
7995 88 : struct ArrowArray *arrayFIDColumn = nullptr;
7996 88 : bool bFallbackTypesUsed = false;
7997 836 : for (int64_t i = 0; i < schema->n_children; ++i)
7998 : {
7999 749 : if (!BuildOGRFieldInfo(schema->children[i], array->children[i],
8000 749 : poLayerDefn, std::string(), aosNativeTypes,
8001 : bFallbackTypesUsed, asFieldInfo, pszFIDName,
8002 : pszGeomFieldName, this,
8003 749 : m_poPrivate->m_oMapArrowFieldNameToOGRFieldName,
8004 : schemaFIDColumn, arrayFIDColumn))
8005 : {
8006 1 : return false;
8007 : }
8008 : }
8009 :
8010 174 : std::map<int, int> oMapOGRFieldIndexToFieldInfoIndex;
8011 174 : std::vector<bool> abUseStringOptim(poLayerDefn->GetFieldCount(), false);
8012 855 : for (int i = 0; i < static_cast<int>(asFieldInfo.size()); ++i)
8013 : {
8014 768 : if (asFieldInfo[i].iOGRFieldIdx >= 0 && !asFieldInfo[i].bIsGeomCol)
8015 : {
8016 654 : CPLAssert(oMapOGRFieldIndexToFieldInfoIndex.find(
8017 : asFieldInfo[i].iOGRFieldIdx) ==
8018 : oMapOGRFieldIndexToFieldInfoIndex.end());
8019 654 : oMapOGRFieldIndexToFieldInfoIndex[asFieldInfo[i].iOGRFieldIdx] = i;
8020 1308 : abUseStringOptim[asFieldInfo[i].iOGRFieldIdx] =
8021 1308 : asFieldInfo[i].bUseStringOptim;
8022 : }
8023 : }
8024 :
8025 174 : OGRFeatureDefn oLayerDefnTmp(poLayerDefn->GetName());
8026 :
8027 : struct LayerDefnTmpRefReleaser
8028 : {
8029 : OGRFeatureDefn &m_oDefn;
8030 :
8031 87 : explicit LayerDefnTmpRefReleaser(OGRFeatureDefn &oDefn) : m_oDefn(oDefn)
8032 : {
8033 87 : m_oDefn.Reference();
8034 87 : }
8035 :
8036 87 : ~LayerDefnTmpRefReleaser()
8037 87 : {
8038 87 : m_oDefn.Dereference();
8039 87 : }
8040 : };
8041 :
8042 174 : LayerDefnTmpRefReleaser oLayerDefnTmpRefReleaser(oLayerDefnTmp);
8043 :
8044 174 : std::vector<int> anIdentityFieldMap;
8045 87 : if (bFallbackTypesUsed)
8046 : {
8047 30 : oLayerDefnTmp.SetGeomType(wkbNone);
8048 101 : for (int i = 0; i < poLayerDefn->GetFieldCount(); ++i)
8049 : {
8050 71 : anIdentityFieldMap.push_back(i);
8051 71 : const auto poSrcFieldDefn = poLayerDefn->GetFieldDefn(i);
8052 71 : const auto oIter = oMapOGRFieldIndexToFieldInfoIndex.find(i);
8053 : OGRFieldDefn oFieldDefn(
8054 : poSrcFieldDefn->GetNameRef(),
8055 71 : oIter == oMapOGRFieldIndexToFieldInfoIndex.end()
8056 1 : ? poSrcFieldDefn->GetType()
8057 143 : : asFieldInfo[oIter->second].eNominalFieldType);
8058 71 : if (oIter != oMapOGRFieldIndexToFieldInfoIndex.end())
8059 70 : asFieldInfo[oIter->second].eSetFeatureFieldType =
8060 70 : asFieldInfo[oIter->second].eNominalFieldType;
8061 71 : oLayerDefnTmp.AddFieldDefn(&oFieldDefn);
8062 : }
8063 59 : for (int i = 0; i < poLayerDefn->GetGeomFieldCount(); ++i)
8064 : {
8065 29 : oLayerDefnTmp.AddGeomFieldDefn(poLayerDefn->GetGeomFieldDefn(i));
8066 : }
8067 : }
8068 : else
8069 : {
8070 723 : for (auto &sFieldInfo : asFieldInfo)
8071 666 : sFieldInfo.eSetFeatureFieldType = sFieldInfo.eTargetFieldType;
8072 : }
8073 :
8074 : struct FeatureCleaner
8075 : {
8076 : OGRFeature &m_oFeature;
8077 : const std::vector<bool> &m_abUseStringOptim;
8078 :
8079 87 : explicit FeatureCleaner(OGRFeature &oFeature,
8080 : const std::vector<bool> &abUseStringOptim)
8081 87 : : m_oFeature(oFeature), m_abUseStringOptim(abUseStringOptim)
8082 : {
8083 87 : }
8084 :
8085 : // As we set a value that can't be CPLFree()'d in the .String member
8086 : // of string fields, we must take care of manually unsetting it before
8087 : // the destructor of OGRFeature gets called.
8088 87 : ~FeatureCleaner()
8089 87 : {
8090 87 : const auto poLayerDefn = m_oFeature.GetDefnRef();
8091 87 : const int nFieldCount = poLayerDefn->GetFieldCount();
8092 745 : for (int i = 0; i < nFieldCount; ++i)
8093 : {
8094 658 : if (m_abUseStringOptim[i])
8095 : {
8096 130 : if (m_oFeature.IsFieldSetAndNotNullUnsafe(i))
8097 102 : m_oFeature.SetFieldSameTypeUnsafe(
8098 : i, static_cast<char *>(nullptr));
8099 : }
8100 : }
8101 87 : }
8102 : };
8103 :
8104 174 : OGRFeature oFeature(bFallbackTypesUsed ? &oLayerDefnTmp : poLayerDefn);
8105 174 : FeatureCleaner oCleaner(oFeature, abUseStringOptim);
8106 174 : OGRFeature oFeatureTarget(poLayerDefn);
8107 87 : OGRFeature *const poFeatureTarget =
8108 87 : bFallbackTypesUsed ? &oFeatureTarget : &oFeature;
8109 :
8110 : // We accumulate the content of all strings in osWorkingBuffer to avoid
8111 : // a few dynamic memory allocations
8112 174 : std::string osWorkingBuffer;
8113 :
8114 : bool bTransactionOK;
8115 : {
8116 87 : CPLErrorStateBackuper oBackuper(CPLQuietErrorHandler);
8117 87 : bTransactionOK = StartTransaction() == OGRERR_NONE;
8118 : }
8119 :
8120 174 : const std::string emptyString;
8121 87 : int64_t fidNullCount = 0;
8122 60484 : for (size_t iFeature = 0; iFeature < static_cast<size_t>(array->length);
8123 : ++iFeature)
8124 : {
8125 60407 : oFeature.SetFID(OGRNullFID);
8126 :
8127 60407 : int iArrowIdx = 0;
8128 60407 : const size_t nWorkingBufferSize = GetWorkingBufferSize(
8129 : schema, array, iFeature, iArrowIdx, asFieldInfo);
8130 60407 : osWorkingBuffer.clear();
8131 60407 : osWorkingBuffer.reserve(nWorkingBufferSize);
8132 : #ifdef DEBUG
8133 60407 : const char *pszWorkingBuffer = osWorkingBuffer.c_str();
8134 60407 : CPL_IGNORE_RET_VAL(pszWorkingBuffer);
8135 : #endif
8136 60407 : iArrowIdx = 0;
8137 1382770 : for (int64_t i = 0; i < schema->n_children; ++i)
8138 : {
8139 1322360 : if (!FillFeature(this, schema->children[i], array->children[i],
8140 : emptyString, iFeature, iArrowIdx, asFieldInfo,
8141 : oFeature, osWorkingBuffer))
8142 : {
8143 2 : if (bTransactionOK)
8144 2 : RollbackTransaction();
8145 10 : return false;
8146 : }
8147 : }
8148 : #ifdef DEBUG
8149 : // Check that the buffer didn't get reallocated
8150 60405 : CPLAssert(pszWorkingBuffer == osWorkingBuffer.c_str());
8151 60405 : CPLAssert(osWorkingBuffer.size() == nWorkingBufferSize);
8152 : #endif
8153 :
8154 60405 : if (bFallbackTypesUsed)
8155 : {
8156 48 : oFeatureTarget.SetFrom(&oFeature, anIdentityFieldMap.data(),
8157 : /*bForgiving=*/true,
8158 : /*bUseISO8601ForDateTimeAsString=*/true);
8159 48 : oFeatureTarget.SetFID(oFeature.GetFID());
8160 :
8161 48 : if (bErrorIfFieldNotPreserved)
8162 : {
8163 26 : for (int i = 0; i < poLayerDefn->GetFieldCount(); ++i)
8164 : {
8165 16 : if (!oFeature.IsFieldSetAndNotNullUnsafe(i))
8166 : {
8167 4 : continue;
8168 : }
8169 12 : bool bLossyConversion = false;
8170 : const auto eSrcType =
8171 12 : oLayerDefnTmp.GetFieldDefnUnsafe(i)->GetType();
8172 : const auto eDstType =
8173 12 : poLayerDefn->GetFieldDefnUnsafe(i)->GetType();
8174 :
8175 : const auto IsDoubleCastToInt64EqualTInt64 =
8176 2 : [](double dfVal, int64_t nOtherVal)
8177 : {
8178 : // Values in the range [INT64_MAX - 1023, INT64_MAX - 1]
8179 : // get converted to a double that once cast to int64_t
8180 : // is INT64_MAX + 1, hence the strict < comparison
8181 : return dfVal >=
8182 2 : static_cast<double>(
8183 2 : std::numeric_limits<int64_t>::min()) &&
8184 : dfVal <
8185 2 : static_cast<double>(
8186 4 : std::numeric_limits<int64_t>::max()) &&
8187 3 : static_cast<int64_t>(dfVal) == nOtherVal;
8188 : };
8189 :
8190 14 : if (eSrcType == OFTInteger64 && eDstType == OFTInteger &&
8191 2 : oFeatureTarget.GetFieldAsIntegerUnsafe(i) !=
8192 2 : oFeature.GetFieldAsInteger64Unsafe(i))
8193 : {
8194 1 : bLossyConversion = true;
8195 : }
8196 14 : else if (eSrcType == OFTReal && eDstType == OFTInteger &&
8197 3 : oFeatureTarget.GetFieldAsIntegerUnsafe(i) !=
8198 3 : oFeature.GetFieldAsDoubleUnsafe(i))
8199 : {
8200 2 : bLossyConversion = true;
8201 : }
8202 12 : else if (eSrcType == OFTReal && eDstType == OFTInteger64 &&
8203 3 : static_cast<double>(
8204 3 : oFeatureTarget.GetFieldAsInteger64Unsafe(i)) !=
8205 3 : oFeature.GetFieldAsDoubleUnsafe(i))
8206 : {
8207 2 : bLossyConversion = true;
8208 : }
8209 9 : else if (eSrcType == OFTInteger64 && eDstType == OFTReal &&
8210 2 : !IsDoubleCastToInt64EqualTInt64(
8211 : oFeatureTarget.GetFieldAsDoubleUnsafe(i),
8212 2 : oFeature.GetFieldAsInteger64Unsafe(i)))
8213 : {
8214 1 : bLossyConversion = true;
8215 : }
8216 12 : if (bLossyConversion)
8217 : {
8218 6 : CPLError(CE_Failure, CPLE_AppDefined,
8219 : "For feature " CPL_FRMT_GIB
8220 : ", value of field %s cannot not preserved",
8221 : oFeatureTarget.GetFID(),
8222 : oLayerDefnTmp.GetFieldDefn(i)->GetNameRef());
8223 6 : if (bTransactionOK)
8224 6 : RollbackTransaction();
8225 6 : return false;
8226 : }
8227 : }
8228 : }
8229 : }
8230 :
8231 60399 : const auto nInputFID = poFeatureTarget->GetFID();
8232 60399 : if (CreateFeature(poFeatureTarget) != OGRERR_NONE)
8233 : {
8234 1 : if (bTransactionOK)
8235 1 : RollbackTransaction();
8236 1 : return false;
8237 : }
8238 60398 : if (nInputFID != OGRNullFID)
8239 : {
8240 120587 : if (bWarningIfFIDNotPreserved &&
8241 : // cppcheck-suppress knownConditionTrueFalse
8242 60282 : poFeatureTarget->GetFID() != nInputFID)
8243 : {
8244 2 : CPLError(CE_Warning, CPLE_AppDefined,
8245 : "Feature id " CPL_FRMT_GIB " not preserved",
8246 : nInputFID);
8247 : }
8248 60304 : else if (bErrorIfFIDNotPreserved &&
8249 : // cppcheck-suppress knownConditionTrueFalse
8250 1 : poFeatureTarget->GetFID() != nInputFID)
8251 : {
8252 1 : CPLError(CE_Failure, CPLE_AppDefined,
8253 : "Feature id " CPL_FRMT_GIB " not preserved",
8254 : nInputFID);
8255 1 : if (bTransactionOK)
8256 1 : RollbackTransaction();
8257 1 : return false;
8258 : }
8259 : }
8260 :
8261 60397 : if (arrayFIDColumn)
8262 : {
8263 60309 : uint8_t *pabyValidity = static_cast<uint8_t *>(
8264 60309 : const_cast<void *>(arrayFIDColumn->buffers[0]));
8265 60309 : if (IsInt32(schemaFIDColumn->format))
8266 : {
8267 6 : auto *panValues = static_cast<int32_t *>(
8268 6 : const_cast<void *>(arrayFIDColumn->buffers[1]));
8269 6 : if (poFeatureTarget->GetFID() >
8270 6 : std::numeric_limits<int32_t>::max())
8271 : {
8272 0 : if (pabyValidity)
8273 : {
8274 0 : ++fidNullCount;
8275 0 : UnsetBit(pabyValidity,
8276 0 : static_cast<size_t>(iFeature +
8277 0 : arrayFIDColumn->offset));
8278 : }
8279 0 : CPLError(CE_Warning, CPLE_AppDefined,
8280 : "FID " CPL_FRMT_GIB
8281 : " cannot be stored in FID array of type int32",
8282 : poFeatureTarget->GetFID());
8283 : }
8284 : else
8285 : {
8286 6 : if (pabyValidity)
8287 : {
8288 5 : SetBit(pabyValidity,
8289 5 : static_cast<size_t>(iFeature +
8290 5 : arrayFIDColumn->offset));
8291 : }
8292 6 : panValues[iFeature + arrayFIDColumn->offset] =
8293 6 : static_cast<int32_t>(poFeatureTarget->GetFID());
8294 : }
8295 : }
8296 60303 : else if (IsInt64(schemaFIDColumn->format))
8297 : {
8298 60303 : if (pabyValidity)
8299 : {
8300 0 : SetBit(
8301 : pabyValidity,
8302 0 : static_cast<size_t>(iFeature + arrayFIDColumn->offset));
8303 : }
8304 60303 : auto *panValues = static_cast<int64_t *>(
8305 60303 : const_cast<void *>(arrayFIDColumn->buffers[1]));
8306 60303 : panValues[iFeature + arrayFIDColumn->offset] =
8307 60303 : poFeatureTarget->GetFID();
8308 : }
8309 : else
8310 : {
8311 0 : CPLAssert(false);
8312 : }
8313 : }
8314 : }
8315 77 : if (arrayFIDColumn && arrayFIDColumn->buffers[0])
8316 : {
8317 1 : arrayFIDColumn->null_count = fidNullCount;
8318 : }
8319 :
8320 77 : bool bRet = true;
8321 77 : if (bTransactionOK)
8322 66 : bRet = CommitTransaction() == OGRERR_NONE;
8323 :
8324 77 : return bRet;
8325 : }
8326 :
8327 : /************************************************************************/
8328 : /* OGR_L_WriteArrowBatch() */
8329 : /************************************************************************/
8330 :
8331 : // clang-format off
8332 : /** Writes a batch of rows from an ArrowArray.
8333 : *
8334 : * This is semantically close to calling CreateFeature() with multiple features
8335 : * at once.
8336 : *
8337 : * The ArrowArray must be of type struct (format=+s), and its children generally
8338 : * map to a OGR attribute or geometry field (unless they are struct themselves).
8339 : *
8340 : * Method IsArrowSchemaSupported() can be called to determine if the schema
8341 : * will be supported by WriteArrowBatch().
8342 : *
8343 : * OGR fields for the corresponding children arrays must exist and be of a
8344 : * compatible type. For attribute fields, they should generally be created with
8345 : * CreateFieldFromArrowSchema(). This is strictly required for output drivers
8346 : * Arrow or Parquet, and strongly recommended otherwise. For geometry fields,
8347 : * they should be created either implicitly at CreateLayer() type
8348 : * (if geom_type != wkbNone), or explicitly with CreateGeomField().
8349 : *
8350 : * Starting with GDAL 3.9, some tolerance has been introduced in the base
8351 : * implementation of WriteArrowBatch() for scenarios that involve appending to
8352 : * an already existing output layer when the input Arrow field type and the
8353 : * OGR layer field type are 32/64-bi integers or real number, but do not match
8354 : * exactly, which may cause lossy conversions. The IF_FIELD_NOT_PRESERVED option
8355 : * can be used to control the behavior in case of lossy conversion.
8356 : *
8357 : * Arrays for geometry columns should be of binary or large binary type and
8358 : * contain WKB geometry.
8359 : *
8360 : * Note that the passed array may be set to a released state
8361 : * (array->release==NULL) after this call (not by the base implementation,
8362 : * but in specialized ones such as Parquet or Arrow for example)
8363 : *
8364 : * Supported options of the base implementation are:
8365 : * <ul>
8366 : * <li>FID=name. Name of the FID column in the array. If not provided,
8367 : * GetFIDColumn() is used to determine it. The special name
8368 : * OGRLayer::DEFAULT_ARROW_FID_NAME is also recognized if neither FID nor
8369 : * GetFIDColumn() are set.
8370 : * The corresponding ArrowArray must be of type int32 (i) or int64 (l).
8371 : * On input, values of the FID column are used to create the feature.
8372 : * On output, the values of the FID column may be set with the FID of the
8373 : * created feature (if the array is not released).
8374 : * </li>
8375 : * <li>IF_FID_NOT_PRESERVED=NOTHING/ERROR/WARNING. Action to perform when the
8376 : * input FID is not preserved in the output layer. The default is NOTHING.
8377 : * Setting it to ERROR will cause the function to error out. Setting it
8378 : * to WARNING will cause the function to emit a warning but continue its
8379 : * processing.
8380 : * </li>
8381 : * <li>IF_FIELD_NOT_PRESERVED=ERROR/WARNING. (since GDAL 3.9)
8382 : * Action to perform when the input field value is not preserved in the
8383 : * output layer.
8384 : * The default is WARNING, which will cause the function to emit a warning
8385 : * but continue its processing.
8386 : * Setting it to ERROR will cause the function to error out if a lossy
8387 : * conversion is detected.
8388 : * </li>
8389 : * <li>GEOMETRY_NAME=name. Name of the geometry column. If not provided,
8390 : * GetGeometryColumn() is used. The special name
8391 : * OGRLayer::DEFAULT_ARROW_GEOMETRY_NAME is also recognized if neither
8392 : * GEOMETRY_NAME nor GetGeometryColumn() are set.
8393 : * Geometry columns are also identified if they have
8394 : * ARROW:extension:name=ogc.wkb as a field metadata.
8395 : * The corresponding ArrowArray must be of type binary (w) or large
8396 : * binary (W).
8397 : * </li>
8398 : * </ul>
8399 : *
8400 : * The following example demonstrates how to copy a layer from one format to
8401 : * another one (assuming it has at most a single geometry column):
8402 : \code{.py}
8403 : def copy_layer(src_lyr, out_filename, out_format, lcos = {}):
8404 : stream = src_lyr.GetArrowStream()
8405 : schema = stream.GetSchema()
8406 :
8407 : # If the source layer has a FID column and the output driver supports
8408 : # a FID layer creation option, set it to the source FID column name.
8409 : if src_lyr.GetFIDColumn():
8410 : creationOptions = gdal.GetDriverByName(out_format).GetMetadataItem(
8411 : "DS_LAYER_CREATIONOPTIONLIST"
8412 : )
8413 : if creationOptions and '"FID"' in creationOptions:
8414 : lcos["FID"] = src_lyr.GetFIDColumn()
8415 :
8416 : with ogr.GetDriverByName(out_format).CreateDataSource(out_filename) as out_ds:
8417 : if src_lyr.GetLayerDefn().GetGeomFieldCount() > 1:
8418 : out_lyr = out_ds.CreateLayer(
8419 : src_lyr.GetName(), geom_type=ogr.wkbNone, options=lcos
8420 : )
8421 : for i in range(src_lyr.GetLayerDefn().GetGeomFieldCount()):
8422 : out_lyr.CreateGeomField(src_lyr.GetLayerDefn().GetGeomFieldDefn(i))
8423 : else:
8424 : out_lyr = out_ds.CreateLayer(
8425 : src_lyr.GetName(),
8426 : geom_type=src_lyr.GetGeomType(),
8427 : srs=src_lyr.GetSpatialRef(),
8428 : options=lcos,
8429 : )
8430 :
8431 : success, error_msg = out_lyr.IsArrowSchemaSupported(schema)
8432 : assert success, error_msg
8433 :
8434 : src_geom_field_names = [
8435 : src_lyr.GetLayerDefn().GetGeomFieldDefn(i).GetName()
8436 : for i in range(src_lyr.GetLayerDefn().GetGeomFieldCount())
8437 : ]
8438 : for i in range(schema.GetChildrenCount()):
8439 : # GetArrowStream() may return "OGC_FID" for a unnamed source FID
8440 : # column and "wkb_geometry" for a unnamed source geometry column.
8441 : # Also test GetFIDColumn() and src_geom_field_names if they are
8442 : # named.
8443 : if (
8444 : schema.GetChild(i).GetName()
8445 : not in ("OGC_FID", "wkb_geometry", src_lyr.GetFIDColumn())
8446 : and schema.GetChild(i).GetName() not in src_geom_field_names
8447 : ):
8448 : out_lyr.CreateFieldFromArrowSchema(schema.GetChild(i))
8449 :
8450 : write_options = []
8451 : if src_lyr.GetFIDColumn():
8452 : write_options.append("FID=" + src_lyr.GetFIDColumn())
8453 : if (
8454 : src_lyr.GetLayerDefn().GetGeomFieldCount() == 1
8455 : and src_lyr.GetGeometryColumn()
8456 : ):
8457 : write_options.append("GEOMETRY_NAME=" + src_lyr.GetGeometryColumn())
8458 :
8459 : while True:
8460 : array = stream.GetNextRecordBatch()
8461 : if array is None:
8462 : break
8463 : out_lyr.WriteArrowBatch(schema, array, write_options)
8464 : \endcode
8465 : *
8466 : * This method and CreateFeature() are mutually exclusive in the same session.
8467 : *
8468 : * This method is the same as the C++ method OGRLayer::WriteArrowBatch().
8469 : *
8470 : * @param hLayer Layer.
8471 : * @param schema Schema of array.
8472 : * @param array Array of type struct. It may be released (array->release==NULL)
8473 : * after calling this method.
8474 : * @param papszOptions Options. Null terminated list, or nullptr.
8475 : * @return true in case of success
8476 : * @since 3.8
8477 : */
8478 : // clang-format on
8479 :
8480 59 : bool OGR_L_WriteArrowBatch(OGRLayerH hLayer, const struct ArrowSchema *schema,
8481 : struct ArrowArray *array, char **papszOptions)
8482 : {
8483 59 : VALIDATE_POINTER1(hLayer, __func__, false);
8484 59 : VALIDATE_POINTER1(schema, __func__, false);
8485 59 : VALIDATE_POINTER1(array, __func__, false);
8486 :
8487 118 : return OGRLayer::FromHandle(hLayer)->WriteArrowBatch(schema, array,
8488 59 : papszOptions);
8489 : }
|