Line data Source code
1 : /******************************************************************************
2 : *
3 : * Project: OpenGIS Simple Features Reference Implementation
4 : * Purpose: Parts of OGRLayer dealing with Arrow C interface
5 : * Author: Even Rouault, <even dot rouault at spatialys.com>
6 : *
7 : ******************************************************************************
8 : * Copyright (c) 2022-2023, Even Rouault <even dot rouault at spatialys.com>
9 : *
10 : * SPDX-License-Identifier: MIT
11 : ****************************************************************************/
12 :
13 : #include "ogrsf_frmts.h"
14 : #include "ogr_api.h"
15 : #include "ogr_recordbatch.h"
16 : #include "ograrrowarrayhelper.h"
17 : #include "ogrlayerarrow.h"
18 : #include "ogr_p.h"
19 : #include "ogr_swq.h"
20 : #include "ogr_wkb.h"
21 : #include "ogr_p.h"
22 : #include "ogrlayer_private.h"
23 :
24 : #include "cpl_float.h"
25 : #include "cpl_json.h"
26 : #include "cpl_time.h"
27 :
28 : #include <algorithm>
29 : #include <cassert>
30 : #include <cinttypes>
31 : #include <limits>
32 : #include <utility>
33 : #include <set>
34 : #include <string_view>
35 : #include <type_traits>
36 :
37 : constexpr const char *MD_GDAL_OGR_TYPE = "GDAL:OGR:type";
38 : constexpr const char *MD_GDAL_OGR_ALTERNATIVE_NAME =
39 : "GDAL:OGR:alternative_name";
40 : constexpr const char *MD_GDAL_OGR_COMMENT = "GDAL:OGR:comment";
41 : constexpr const char *MD_GDAL_OGR_DEFAULT = "GDAL:OGR:default";
42 : constexpr const char *MD_GDAL_OGR_SUBTYPE = "GDAL:OGR:subtype";
43 : constexpr const char *MD_GDAL_OGR_WIDTH = "GDAL:OGR:width";
44 : constexpr const char *MD_GDAL_OGR_UNIQUE = "GDAL:OGR:unique";
45 : constexpr const char *MD_GDAL_OGR_DOMAIN_NAME = "GDAL:OGR:domain_name";
46 :
47 : constexpr char ARROW_LETTER_BOOLEAN = 'b';
48 : constexpr char ARROW_LETTER_INT8 = 'c';
49 : constexpr char ARROW_LETTER_UINT8 = 'C';
50 : constexpr char ARROW_LETTER_INT16 = 's';
51 : constexpr char ARROW_LETTER_UINT16 = 'S';
52 : constexpr char ARROW_LETTER_INT32 = 'i';
53 : constexpr char ARROW_LETTER_UINT32 = 'I';
54 : constexpr char ARROW_LETTER_INT64 = 'l';
55 : constexpr char ARROW_LETTER_UINT64 = 'L';
56 : constexpr char ARROW_LETTER_FLOAT16 = 'e';
57 : constexpr char ARROW_LETTER_FLOAT32 = 'f';
58 : constexpr char ARROW_LETTER_FLOAT64 = 'g';
59 : constexpr char ARROW_LETTER_STRING = 'u';
60 : constexpr char ARROW_LETTER_LARGE_STRING = 'U';
61 : constexpr char ARROW_LETTER_BINARY = 'z';
62 : constexpr char ARROW_LETTER_LARGE_BINARY = 'Z';
63 : constexpr char ARROW_LETTER_DECIMAL = 'd';
64 : constexpr char ARROW_2ND_LETTER_LIST = 'l';
65 : constexpr char ARROW_2ND_LETTER_LARGE_LIST = 'L';
66 :
67 : constexpr int N_VALUES_PER_STRING_VIEW = 4;
68 :
69 2753240 : static inline bool IsStructure(const char *format)
70 : {
71 2753240 : return format[0] == '+' && format[1] == 's' && format[2] == 0;
72 : }
73 :
74 23350 : static inline bool IsMap(const char *format)
75 : {
76 23350 : return format[0] == '+' && format[1] == 'm' && format[2] == 0;
77 : }
78 :
79 3145 : static inline bool IsFixedWidthBinary(const char *format)
80 : {
81 3145 : return format[0] == 'w' && format[1] == ':';
82 : }
83 :
84 202 : static inline int GetFixedWithBinary(const char *format)
85 : {
86 202 : return atoi(format + strlen("w:"));
87 : }
88 :
89 30557 : static inline bool IsList(const char *format)
90 : {
91 36678 : return format[0] == '+' && format[1] == ARROW_2ND_LETTER_LIST &&
92 36678 : format[2] == 0;
93 : }
94 :
95 20324 : static inline bool IsLargeList(const char *format)
96 : {
97 20448 : return format[0] == '+' && format[1] == ARROW_2ND_LETTER_LARGE_LIST &&
98 20448 : format[2] == 0;
99 : }
100 :
101 33414 : static inline bool IsFixedSizeList(const char *format)
102 : {
103 33414 : return format[0] == '+' && format[1] == 'w' && format[2] == ':';
104 : }
105 :
106 2311 : static inline int GetFixedSizeList(const char *format)
107 : {
108 2311 : return atoi(format + strlen("+w:"));
109 : }
110 :
111 2800 : static inline bool IsDecimal(const char *format)
112 : {
113 2800 : return format[0] == ARROW_LETTER_DECIMAL && format[1] == ':';
114 : }
115 :
116 1342140 : static inline bool IsBoolean(const char *format)
117 : {
118 1342140 : return format[0] == ARROW_LETTER_BOOLEAN && format[1] == 0;
119 : }
120 :
121 1338900 : static inline bool IsInt8(const char *format)
122 : {
123 1338900 : return format[0] == ARROW_LETTER_INT8 && format[1] == 0;
124 : }
125 :
126 1339020 : static inline bool IsUInt8(const char *format)
127 : {
128 1339020 : return format[0] == ARROW_LETTER_UINT8 && format[1] == 0;
129 : }
130 :
131 1337600 : static inline bool IsInt16(const char *format)
132 : {
133 1337600 : return format[0] == ARROW_LETTER_INT16 && format[1] == 0;
134 : }
135 :
136 1337700 : static inline bool IsUInt16(const char *format)
137 : {
138 1337700 : return format[0] == ARROW_LETTER_UINT16 && format[1] == 0;
139 : }
140 :
141 1396970 : static inline bool IsInt32(const char *format)
142 : {
143 1396970 : return format[0] == ARROW_LETTER_INT32 && format[1] == 0;
144 : }
145 :
146 1336060 : static inline bool IsUInt32(const char *format)
147 : {
148 1336060 : return format[0] == ARROW_LETTER_UINT32 && format[1] == 0;
149 : }
150 :
151 1389670 : static inline bool IsInt64(const char *format)
152 : {
153 1389670 : return format[0] == ARROW_LETTER_INT64 && format[1] == 0;
154 : }
155 :
156 1268540 : static inline bool IsUInt64(const char *format)
157 : {
158 1268540 : return format[0] == ARROW_LETTER_UINT64 && format[1] == 0;
159 : }
160 :
161 15140 : static inline bool IsFloat16(const char *format)
162 : {
163 15140 : return format[0] == ARROW_LETTER_FLOAT16 && format[1] == 0;
164 : }
165 :
166 1274950 : static inline bool IsFloat32(const char *format)
167 : {
168 1274950 : return format[0] == ARROW_LETTER_FLOAT32 && format[1] == 0;
169 : }
170 :
171 1266610 : static inline bool IsFloat64(const char *format)
172 : {
173 1266610 : return format[0] == ARROW_LETTER_FLOAT64 && format[1] == 0;
174 : }
175 :
176 2485410 : static inline bool IsString(const char *format)
177 : {
178 2485410 : return format[0] == ARROW_LETTER_STRING && format[1] == 0;
179 : }
180 :
181 61894 : static inline bool IsStringView(const char *format)
182 : {
183 61894 : return format[0] == 'v' && format[1] == ARROW_LETTER_STRING &&
184 61894 : format[2] == 0;
185 : }
186 :
187 74115 : static inline bool IsLargeString(const char *format)
188 : {
189 74115 : return format[0] == ARROW_LETTER_LARGE_STRING && format[1] == 0;
190 : }
191 :
192 79416 : static inline bool IsBinary(const char *format)
193 : {
194 79416 : return format[0] == ARROW_LETTER_BINARY && format[1] == 0;
195 : }
196 :
197 13002 : static inline bool IsLargeBinary(const char *format)
198 : {
199 13002 : return format[0] == ARROW_LETTER_LARGE_BINARY && format[1] == 0;
200 : }
201 :
202 14724 : static inline bool IsTimestampInternal(const char *format, char chType)
203 : {
204 16441 : return format[0] == 't' && format[1] == 's' && format[2] == chType &&
205 16441 : format[3] == ':';
206 : }
207 :
208 4399 : static inline bool IsTimestampSeconds(const char *format)
209 : {
210 4399 : return IsTimestampInternal(format, 's');
211 : }
212 :
213 4389 : static inline bool IsTimestampMilliseconds(const char *format)
214 : {
215 4389 : return IsTimestampInternal(format, 'm');
216 : }
217 :
218 3244 : static inline bool IsTimestampMicroseconds(const char *format)
219 : {
220 3244 : return IsTimestampInternal(format, 'u');
221 : }
222 :
223 2692 : static inline bool IsTimestampNanoseconds(const char *format)
224 : {
225 2692 : return IsTimestampInternal(format, 'n');
226 : }
227 :
228 3641 : static inline bool IsTimestamp(const char *format)
229 : {
230 9831 : return IsTimestampSeconds(format) || IsTimestampMilliseconds(format) ||
231 9831 : IsTimestampMicroseconds(format) || IsTimestampNanoseconds(format);
232 : }
233 :
234 107 : static inline const char *GetTimestampTimezone(const char *format)
235 : {
236 107 : return IsTimestamp(format) ? format + strlen("tm?:") : "";
237 : }
238 :
239 : /************************************************************************/
240 : /* TestBit() */
241 : /************************************************************************/
242 :
243 13127 : inline bool TestBit(const uint8_t *pabyData, size_t nIdx)
244 : {
245 13127 : return (pabyData[nIdx / 8] & (1 << (nIdx % 8))) != 0;
246 : }
247 :
248 : /************************************************************************/
249 : /* SetBit() */
250 : /************************************************************************/
251 :
252 9676 : inline void SetBit(uint8_t *pabyData, size_t nIdx)
253 : {
254 9676 : pabyData[nIdx / 8] |= (1 << (nIdx % 8));
255 9676 : }
256 :
257 : /************************************************************************/
258 : /* UnsetBit() */
259 : /************************************************************************/
260 :
261 12383 : inline void UnsetBit(uint8_t *pabyData, size_t nIdx)
262 : {
263 12383 : pabyData[nIdx / 8] &= uint8_t(~(1 << (nIdx % 8)));
264 12383 : }
265 :
266 : /************************************************************************/
267 : /* DefaultReleaseSchema() */
268 : /************************************************************************/
269 :
270 25484 : static void OGRLayerReleaseSchema(struct ArrowSchema *schema,
271 : bool bFullFreeFormat)
272 : {
273 25484 : CPLAssert(schema->release != nullptr);
274 25484 : if (bFullFreeFormat || STARTS_WITH(schema->format, "w:") ||
275 25452 : STARTS_WITH(schema->format, "tsm:"))
276 : {
277 1039 : CPLFree(const_cast<char *>(schema->format));
278 : }
279 25484 : CPLFree(const_cast<char *>(schema->name));
280 25484 : CPLFree(const_cast<char *>(schema->metadata));
281 25484 : if (schema->children)
282 : {
283 26138 : for (int i = 0; i < static_cast<int>(schema->n_children); ++i)
284 : {
285 22869 : if (schema->children[i] && schema->children[i]->release)
286 : {
287 22869 : schema->children[i]->release(schema->children[i]);
288 22869 : CPLFree(schema->children[i]);
289 : }
290 : }
291 3269 : CPLFree(schema->children);
292 : }
293 25484 : if (schema->dictionary)
294 : {
295 32 : if (schema->dictionary->release)
296 : {
297 32 : schema->dictionary->release(schema->dictionary);
298 32 : CPLFree(schema->dictionary);
299 : }
300 : }
301 25484 : schema->release = nullptr;
302 25484 : }
303 :
304 25461 : static void OGRLayerPartialReleaseSchema(struct ArrowSchema *schema)
305 : {
306 25461 : OGRLayerReleaseSchema(schema, /* bFullFreeFormat = */ false);
307 25461 : }
308 :
309 23 : static void OGRLayerFullReleaseSchema(struct ArrowSchema *schema)
310 : {
311 23 : OGRLayerReleaseSchema(schema, /* bFullFreeFormat = */ true);
312 23 : }
313 :
314 : /** Release a ArrowSchema.
315 : *
316 : * To be used by driver implementations that have a custom GetArrowStream()
317 : * implementation.
318 : *
319 : * @param schema Schema to release.
320 : * @since GDAL 3.6
321 : */
322 :
323 25429 : void OGRLayer::ReleaseSchema(struct ArrowSchema *schema)
324 : {
325 25429 : OGRLayerPartialReleaseSchema(schema);
326 25429 : }
327 :
328 : /************************************************************************/
329 : /* AddDictToSchema() */
330 : /************************************************************************/
331 :
332 32 : static void AddDictToSchema(struct ArrowSchema *psChild,
333 : const OGRCodedFieldDomain *poCodedDomain)
334 : {
335 32 : const OGRCodedValue *psIter = poCodedDomain->GetEnumeration();
336 32 : int nLastCode = -1;
337 32 : int nCountNull = 0;
338 32 : uint32_t nCountChars = 0;
339 112 : for (; psIter->pszCode; ++psIter)
340 : {
341 80 : if (CPLGetValueType(psIter->pszCode) != CPL_VALUE_INTEGER)
342 : {
343 0 : return;
344 : }
345 80 : int nCode = atoi(psIter->pszCode);
346 80 : if (nCode <= nLastCode || nCode - nLastCode > 100)
347 : {
348 0 : return;
349 : }
350 106 : for (int i = nLastCode + 1; i < nCode; ++i)
351 : {
352 26 : nCountNull++;
353 : }
354 80 : if (psIter->pszValue != nullptr)
355 : {
356 54 : const size_t nLen = strlen(psIter->pszValue);
357 54 : if (nLen > std::numeric_limits<uint32_t>::max() - nCountChars)
358 0 : return;
359 54 : nCountChars += static_cast<uint32_t>(nLen);
360 : }
361 : else
362 26 : nCountNull++;
363 80 : nLastCode = nCode;
364 : }
365 :
366 : auto psChildDict = static_cast<struct ArrowSchema *>(
367 32 : CPLCalloc(1, sizeof(struct ArrowSchema)));
368 32 : psChild->dictionary = psChildDict;
369 32 : psChildDict->release = OGRLayerPartialReleaseSchema;
370 32 : psChildDict->name = CPLStrdup(poCodedDomain->GetName().c_str());
371 32 : psChildDict->format = "u";
372 32 : if (nCountNull)
373 26 : psChildDict->flags = ARROW_FLAG_NULLABLE;
374 : }
375 :
376 : /************************************************************************/
377 : /* DefaultGetArrowSchema() */
378 : /************************************************************************/
379 :
380 : /** Default implementation of the ArrowArrayStream::get_schema() callback.
381 : *
382 : * To be used by driver implementations that have a custom GetArrowStream()
383 : * implementation.
384 : *
385 : * @since GDAL 3.6
386 : */
387 2249 : int OGRLayer::GetArrowSchema(struct ArrowArrayStream *,
388 : struct ArrowSchema *out_schema)
389 : {
390 2249 : const bool bIncludeFID = CPLTestBool(
391 : m_aosArrowArrayStreamOptions.FetchNameValueDef("INCLUDE_FID", "YES"));
392 2249 : const bool bDateTimeAsString = m_aosArrowArrayStreamOptions.FetchBool(
393 : GAS_OPT_DATETIME_AS_STRING, false);
394 2249 : memset(out_schema, 0, sizeof(*out_schema));
395 2249 : out_schema->format = "+s";
396 2249 : out_schema->name = CPLStrdup("");
397 2249 : out_schema->metadata = nullptr;
398 2249 : auto poLayerDefn = GetLayerDefn();
399 2249 : const int nFieldCount = poLayerDefn->GetFieldCount();
400 2249 : const int nGeomFieldCount = poLayerDefn->GetGeomFieldCount();
401 2249 : const int nChildren = 1 + nFieldCount + nGeomFieldCount;
402 :
403 2249 : out_schema->children = static_cast<struct ArrowSchema **>(
404 2249 : CPLCalloc(nChildren, sizeof(struct ArrowSchema *)));
405 2249 : int iSchemaChild = 0;
406 2249 : if (bIncludeFID)
407 : {
408 3970 : out_schema->children[iSchemaChild] = static_cast<struct ArrowSchema *>(
409 1985 : CPLCalloc(1, sizeof(struct ArrowSchema)));
410 1985 : auto psChild = out_schema->children[iSchemaChild];
411 1985 : ++iSchemaChild;
412 1985 : psChild->release = OGRLayer::ReleaseSchema;
413 1985 : const char *pszFIDName = GetFIDColumn();
414 1985 : psChild->name =
415 1985 : CPLStrdup((pszFIDName && pszFIDName[0]) ? pszFIDName
416 : : DEFAULT_ARROW_FID_NAME);
417 1985 : psChild->format = "l";
418 : }
419 20195 : for (int i = 0; i < nFieldCount; ++i)
420 : {
421 17946 : const auto poFieldDefn = poLayerDefn->GetFieldDefn(i);
422 17946 : if (poFieldDefn->IsIgnored())
423 : {
424 48 : continue;
425 : }
426 :
427 35796 : out_schema->children[iSchemaChild] = static_cast<struct ArrowSchema *>(
428 17898 : CPLCalloc(1, sizeof(struct ArrowSchema)));
429 17898 : auto psChild = out_schema->children[iSchemaChild];
430 17898 : ++iSchemaChild;
431 17898 : psChild->release = OGRLayer::ReleaseSchema;
432 17898 : psChild->name = CPLStrdup(poFieldDefn->GetNameRef());
433 17898 : if (poFieldDefn->IsNullable())
434 17112 : psChild->flags = ARROW_FLAG_NULLABLE;
435 17898 : const auto eType = poFieldDefn->GetType();
436 17898 : const auto eSubType = poFieldDefn->GetSubType();
437 17898 : const char *item_format = nullptr;
438 :
439 35796 : std::vector<std::pair<std::string, std::string>> oMetadata;
440 :
441 17898 : switch (eType)
442 : {
443 5745 : case OFTInteger:
444 : {
445 5745 : if (eSubType == OFSTBoolean)
446 286 : psChild->format = "b";
447 5459 : else if (eSubType == OFSTInt16)
448 673 : psChild->format = "s";
449 : else
450 4786 : psChild->format = "i";
451 :
452 5745 : const auto &osDomainName = poFieldDefn->GetDomainName();
453 5745 : if (!osDomainName.empty())
454 : {
455 32 : auto poDS = GetDataset();
456 32 : if (poDS)
457 : {
458 : const auto poFieldDomain =
459 32 : poDS->GetFieldDomain(osDomainName);
460 64 : if (poFieldDomain &&
461 32 : poFieldDomain->GetDomainType() == OFDT_CODED)
462 : {
463 32 : const OGRCodedFieldDomain *poCodedDomain =
464 : static_cast<const OGRCodedFieldDomain *>(
465 : poFieldDomain);
466 32 : AddDictToSchema(psChild, poCodedDomain);
467 : }
468 : }
469 : }
470 :
471 5745 : break;
472 : }
473 :
474 565 : case OFTInteger64:
475 565 : psChild->format = "l";
476 565 : break;
477 :
478 2886 : case OFTReal:
479 : {
480 2886 : if (eSubType == OFSTFloat32)
481 676 : psChild->format = "f";
482 : else
483 2210 : psChild->format = "g";
484 2886 : break;
485 : }
486 :
487 5130 : case OFTString:
488 : case OFTWideString:
489 5130 : psChild->format = "u";
490 5130 : break;
491 :
492 1209 : case OFTBinary:
493 : {
494 1209 : if (poFieldDefn->GetWidth() > 0)
495 9 : psChild->format =
496 9 : CPLStrdup(CPLSPrintf("w:%d", poFieldDefn->GetWidth()));
497 : else
498 1200 : psChild->format = "z";
499 1209 : break;
500 : }
501 :
502 383 : case OFTIntegerList:
503 : {
504 383 : if (eSubType == OFSTBoolean)
505 92 : item_format = "b";
506 291 : else if (eSubType == OFSTInt16)
507 67 : item_format = "s";
508 : else
509 224 : item_format = "i";
510 383 : break;
511 : }
512 :
513 97 : case OFTInteger64List:
514 97 : item_format = "l";
515 97 : break;
516 :
517 256 : case OFTRealList:
518 : {
519 256 : if (eSubType == OFSTFloat32)
520 84 : item_format = "f";
521 : else
522 172 : item_format = "g";
523 256 : break;
524 : }
525 :
526 273 : case OFTStringList:
527 : case OFTWideStringList:
528 273 : item_format = "u";
529 273 : break;
530 :
531 210 : case OFTDate:
532 210 : psChild->format = "tdD";
533 210 : break;
534 :
535 118 : case OFTTime:
536 118 : psChild->format = "ttm";
537 118 : break;
538 :
539 1026 : case OFTDateTime:
540 : {
541 1026 : const char *pszPrefix = "tsm:";
542 : const char *pszTZOverride =
543 1026 : m_aosArrowArrayStreamOptions.FetchNameValue("TIMEZONE");
544 1026 : const int nTZFlag = poFieldDefn->GetTZFlag();
545 1026 : if (bDateTimeAsString)
546 : {
547 19 : psChild->format = "u";
548 : }
549 1007 : else if (pszTZOverride && EQUAL(pszTZOverride, "unknown"))
550 : {
551 4 : psChild->format = CPLStrdup(pszPrefix);
552 : }
553 1003 : else if ((pszTZOverride && EQUAL(pszTZOverride, "mixed")) ||
554 963 : (!pszTZOverride && nTZFlag == OGR_TZFLAG_MIXED_TZ))
555 : {
556 : oMetadata.emplace_back(
557 6 : std::pair(ARROW_EXTENSION_NAME_KEY,
558 6 : EXTENSION_NAME_ARROW_TIMESTAMP_WITH_OFFSET));
559 :
560 6 : psChild->format = "+s";
561 6 : psChild->n_children = 2;
562 6 : psChild->children = static_cast<struct ArrowSchema **>(
563 6 : CPLCalloc(2, sizeof(struct ArrowSchema *)));
564 :
565 : // Create sub-child for timestamp in UTC
566 12 : psChild->children[0] = static_cast<struct ArrowSchema *>(
567 6 : CPLCalloc(1, sizeof(struct ArrowSchema)));
568 6 : psChild->children[0]->release = OGRLayer::ReleaseSchema;
569 12 : psChild->children[0]->name =
570 6 : CPLStrdup(ATSWO_TIMESTAMP_FIELD_NAME);
571 6 : psChild->children[0]->format = CPLStrdup("tsm:UTC");
572 :
573 : // Create sub-child for offset to UTC in minutes
574 12 : psChild->children[1] = static_cast<struct ArrowSchema *>(
575 6 : CPLCalloc(1, sizeof(struct ArrowSchema)));
576 6 : psChild->children[1]->release = OGRLayer::ReleaseSchema;
577 12 : psChild->children[1]->name =
578 6 : CPLStrdup(ATSWO_OFFSET_MINUTES_FIELD_NAME);
579 6 : psChild->children[1]->format = "s";
580 : }
581 997 : else if (pszTZOverride)
582 : {
583 40 : psChild->format = CPLStrdup(
584 80 : (std::string(pszPrefix) + pszTZOverride).c_str());
585 : }
586 : else
587 : {
588 957 : if (nTZFlag == OGR_TZFLAG_UTC)
589 : {
590 5 : psChild->format =
591 5 : CPLStrdup(CPLSPrintf("%sUTC", pszPrefix));
592 : }
593 952 : else if (nTZFlag == OGR_TZFLAG_UNKNOWN ||
594 : nTZFlag == OGR_TZFLAG_LOCALTIME)
595 : {
596 936 : psChild->format = CPLStrdup(pszPrefix);
597 : }
598 : else
599 : {
600 16 : psChild->format = CPLStrdup(
601 32 : (pszPrefix + OGRTZFlagToTimezone(nTZFlag, "UTC"))
602 : .c_str());
603 : }
604 : }
605 1026 : break;
606 : }
607 : }
608 :
609 17898 : if (item_format)
610 : {
611 1009 : psChild->format = "+l";
612 1009 : psChild->n_children = 1;
613 1009 : psChild->children = static_cast<struct ArrowSchema **>(
614 1009 : CPLCalloc(1, sizeof(struct ArrowSchema *)));
615 2018 : psChild->children[0] = static_cast<struct ArrowSchema *>(
616 1009 : CPLCalloc(1, sizeof(struct ArrowSchema)));
617 1009 : psChild->children[0]->release = OGRLayer::ReleaseSchema;
618 1009 : psChild->children[0]->name = CPLStrdup("item");
619 1009 : psChild->children[0]->format = item_format;
620 : }
621 :
622 17898 : if (eType == OFTDateTime && bDateTimeAsString)
623 : {
624 : oMetadata.emplace_back(
625 19 : std::pair(MD_GDAL_OGR_TYPE, OGR_GetFieldTypeName(eType)));
626 : }
627 :
628 17898 : const char *pszAlternativeName = poFieldDefn->GetAlternativeNameRef();
629 17898 : if (pszAlternativeName && pszAlternativeName[0])
630 : oMetadata.emplace_back(
631 262 : std::pair(MD_GDAL_OGR_ALTERNATIVE_NAME, pszAlternativeName));
632 :
633 17898 : const char *pszDefault = poFieldDefn->GetDefault();
634 17898 : if (pszDefault && pszDefault[0])
635 42 : oMetadata.emplace_back(std::pair(MD_GDAL_OGR_DEFAULT, pszDefault));
636 :
637 17898 : const std::string &osComment = poFieldDefn->GetComment();
638 17898 : if (!osComment.empty())
639 10 : oMetadata.emplace_back(std::pair(MD_GDAL_OGR_COMMENT, osComment));
640 :
641 17898 : if (eType == OFTString && eSubType == OFSTJSON)
642 : {
643 130 : oMetadata.emplace_back(
644 130 : std::pair(ARROW_EXTENSION_NAME_KEY, EXTENSION_NAME_ARROW_JSON));
645 : }
646 17768 : else if (eSubType != OFSTNone && eSubType != OFSTBoolean &&
647 : eSubType != OFSTFloat32)
648 : {
649 0 : oMetadata.emplace_back(std::pair(
650 741 : MD_GDAL_OGR_SUBTYPE, OGR_GetFieldSubTypeName(eSubType)));
651 : }
652 17898 : if (eType == OFTString && poFieldDefn->GetWidth() > 0)
653 : {
654 0 : oMetadata.emplace_back(std::pair(
655 641 : MD_GDAL_OGR_WIDTH, CPLSPrintf("%d", poFieldDefn->GetWidth())));
656 : }
657 17898 : if (poFieldDefn->IsUnique())
658 : {
659 10 : oMetadata.emplace_back(std::pair(MD_GDAL_OGR_UNIQUE, "true"));
660 : }
661 17898 : if (!poFieldDefn->GetDomainName().empty())
662 : {
663 64 : oMetadata.emplace_back(std::pair(MD_GDAL_OGR_DOMAIN_NAME,
664 64 : poFieldDefn->GetDomainName()));
665 : }
666 :
667 17898 : if (!oMetadata.empty())
668 : {
669 1863 : uint64_t nLen64 = sizeof(int32_t);
670 3756 : for (const auto &oPair : oMetadata)
671 : {
672 1893 : nLen64 += sizeof(int32_t);
673 1893 : nLen64 += oPair.first.size();
674 1893 : nLen64 += sizeof(int32_t);
675 1893 : nLen64 += oPair.second.size();
676 : }
677 1863 : if (nLen64 <
678 1863 : static_cast<uint64_t>(std::numeric_limits<int32_t>::max()))
679 : {
680 1863 : const size_t nLen = static_cast<size_t>(nLen64);
681 1863 : char *pszMetadata = static_cast<char *>(CPLMalloc(nLen));
682 1863 : psChild->metadata = pszMetadata;
683 1863 : size_t offsetMD = 0;
684 1863 : int32_t nSize = static_cast<int>(oMetadata.size());
685 1863 : memcpy(pszMetadata + offsetMD, &nSize, sizeof(nSize));
686 1863 : offsetMD += sizeof(int32_t);
687 3756 : for (const auto &oPair : oMetadata)
688 : {
689 1893 : nSize = static_cast<int32_t>(oPair.first.size());
690 1893 : memcpy(pszMetadata + offsetMD, &nSize, sizeof(nSize));
691 1893 : offsetMD += sizeof(int32_t);
692 1893 : memcpy(pszMetadata + offsetMD, oPair.first.data(),
693 : oPair.first.size());
694 1893 : offsetMD += oPair.first.size();
695 :
696 1893 : nSize = static_cast<int32_t>(oPair.second.size());
697 1893 : memcpy(pszMetadata + offsetMD, &nSize, sizeof(nSize));
698 1893 : offsetMD += sizeof(int32_t);
699 1893 : memcpy(pszMetadata + offsetMD, oPair.second.data(),
700 : oPair.second.size());
701 1893 : offsetMD += oPair.second.size();
702 : }
703 :
704 1863 : CPLAssert(offsetMD == nLen);
705 1863 : CPL_IGNORE_RET_VAL(offsetMD);
706 : }
707 : else
708 : {
709 : // Extremely unlikely !
710 0 : CPLError(CE_Warning, CPLE_AppDefined,
711 : "Cannot write ArrowSchema::metadata due to "
712 : "too large content");
713 : }
714 : }
715 : }
716 :
717 : const char *const pszGeometryMetadataEncoding =
718 2249 : m_aosArrowArrayStreamOptions.FetchNameValue(
719 : "GEOMETRY_METADATA_ENCODING");
720 2249 : const char *pszExtensionName = EXTENSION_NAME_OGC_WKB;
721 2249 : if (pszGeometryMetadataEncoding)
722 : {
723 6 : if (EQUAL(pszGeometryMetadataEncoding, "OGC"))
724 0 : pszExtensionName = EXTENSION_NAME_OGC_WKB;
725 6 : else if (EQUAL(pszGeometryMetadataEncoding, "GEOARROW"))
726 6 : pszExtensionName = EXTENSION_NAME_GEOARROW_WKB;
727 : else
728 0 : CPLError(CE_Warning, CPLE_NotSupported,
729 : "Unsupported GEOMETRY_METADATA_ENCODING value: %s",
730 : pszGeometryMetadataEncoding);
731 : }
732 4211 : for (int i = 0; i < nGeomFieldCount; ++i)
733 : {
734 1962 : const auto poFieldDefn = poLayerDefn->GetGeomFieldDefn(i);
735 1962 : if (poFieldDefn->IsIgnored())
736 : {
737 15 : continue;
738 : }
739 :
740 1947 : out_schema->children[iSchemaChild] = CreateSchemaForWKBGeometryColumn(
741 : poFieldDefn, "z", pszExtensionName);
742 :
743 1947 : ++iSchemaChild;
744 : }
745 :
746 2249 : out_schema->n_children = iSchemaChild;
747 2249 : out_schema->release = OGRLayer::ReleaseSchema;
748 2249 : return 0;
749 : }
750 :
751 : /************************************************************************/
752 : /* CreateSchemaForWKBGeometryColumn() */
753 : /************************************************************************/
754 :
755 : /** Return a ArrowSchema* corresponding to the WKB encoding of a geometry
756 : * column.
757 : */
758 :
759 : /* static */
760 : struct ArrowSchema *
761 2276 : OGRLayer::CreateSchemaForWKBGeometryColumn(const OGRGeomFieldDefn *poFieldDefn,
762 : const char *pszArrowFormat,
763 : const char *pszExtensionName)
764 : {
765 2276 : CPLAssert(strcmp(pszArrowFormat, "z") == 0 ||
766 : strcmp(pszArrowFormat, "Z") == 0);
767 2276 : if (!EQUAL(pszExtensionName, EXTENSION_NAME_OGC_WKB) &&
768 6 : !EQUAL(pszExtensionName, EXTENSION_NAME_GEOARROW_WKB))
769 : {
770 0 : CPLError(CE_Failure, CPLE_NotSupported,
771 : "Unsupported extension name '%s'. Defaulting to '%s'",
772 : pszExtensionName, EXTENSION_NAME_OGC_WKB);
773 0 : pszExtensionName = EXTENSION_NAME_OGC_WKB;
774 : }
775 : auto psSchema = static_cast<struct ArrowSchema *>(
776 2276 : CPLCalloc(1, sizeof(struct ArrowSchema)));
777 2276 : psSchema->release = OGRLayer::ReleaseSchema;
778 2276 : const char *pszGeomFieldName = poFieldDefn->GetNameRef();
779 2276 : if (pszGeomFieldName[0] == '\0')
780 796 : pszGeomFieldName = DEFAULT_ARROW_GEOMETRY_NAME;
781 2276 : psSchema->name = CPLStrdup(pszGeomFieldName);
782 2276 : if (poFieldDefn->IsNullable())
783 2247 : psSchema->flags = ARROW_FLAG_NULLABLE;
784 2276 : psSchema->format = strcmp(pszArrowFormat, "z") == 0 ? "z" : "Z";
785 2276 : std::string osExtensionMetadata;
786 2276 : if (EQUAL(pszExtensionName, EXTENSION_NAME_GEOARROW_WKB))
787 : {
788 6 : const auto poSRS = poFieldDefn->GetSpatialRef();
789 6 : if (poSRS)
790 : {
791 3 : char *pszPROJJSON = nullptr;
792 3 : poSRS->exportToPROJJSON(&pszPROJJSON, nullptr);
793 3 : if (pszPROJJSON)
794 : {
795 3 : osExtensionMetadata = "{\"crs\":";
796 3 : osExtensionMetadata += pszPROJJSON;
797 3 : osExtensionMetadata += '}';
798 3 : CPLFree(pszPROJJSON);
799 : }
800 : else
801 : {
802 0 : CPLError(CE_Warning, CPLE_AppDefined,
803 : "Cannot export CRS of geometry field %s to PROJJSON",
804 : poFieldDefn->GetNameRef());
805 : }
806 : }
807 : }
808 2276 : size_t nLen = sizeof(int32_t) + sizeof(int32_t) +
809 : strlen(ARROW_EXTENSION_NAME_KEY) + sizeof(int32_t) +
810 2276 : strlen(pszExtensionName);
811 2276 : if (!osExtensionMetadata.empty())
812 : {
813 3 : nLen += sizeof(int32_t) + strlen(ARROW_EXTENSION_METADATA_KEY) +
814 3 : sizeof(int32_t) + osExtensionMetadata.size();
815 : }
816 2276 : char *pszMetadata = static_cast<char *>(CPLMalloc(nLen));
817 2276 : psSchema->metadata = pszMetadata;
818 2276 : size_t offsetMD = 0;
819 2276 : *reinterpret_cast<int32_t *>(pszMetadata + offsetMD) =
820 2276 : osExtensionMetadata.empty() ? 1 : 2;
821 2276 : offsetMD += sizeof(int32_t);
822 2276 : *reinterpret_cast<int32_t *>(pszMetadata + offsetMD) =
823 : static_cast<int32_t>(strlen(ARROW_EXTENSION_NAME_KEY));
824 2276 : offsetMD += sizeof(int32_t);
825 2276 : memcpy(pszMetadata + offsetMD, ARROW_EXTENSION_NAME_KEY,
826 : strlen(ARROW_EXTENSION_NAME_KEY));
827 2276 : offsetMD += static_cast<int>(strlen(ARROW_EXTENSION_NAME_KEY));
828 2276 : *reinterpret_cast<int32_t *>(pszMetadata + offsetMD) =
829 2276 : static_cast<int32_t>(strlen(pszExtensionName));
830 2276 : offsetMD += sizeof(int32_t);
831 2276 : memcpy(pszMetadata + offsetMD, pszExtensionName, strlen(pszExtensionName));
832 2276 : offsetMD += strlen(pszExtensionName);
833 2276 : if (!osExtensionMetadata.empty())
834 : {
835 3 : *reinterpret_cast<int32_t *>(pszMetadata + offsetMD) =
836 : static_cast<int32_t>(strlen(ARROW_EXTENSION_METADATA_KEY));
837 3 : offsetMD += sizeof(int32_t);
838 3 : memcpy(pszMetadata + offsetMD, ARROW_EXTENSION_METADATA_KEY,
839 : strlen(ARROW_EXTENSION_METADATA_KEY));
840 3 : offsetMD += static_cast<int>(strlen(ARROW_EXTENSION_METADATA_KEY));
841 3 : *reinterpret_cast<int32_t *>(pszMetadata + offsetMD) =
842 3 : static_cast<int32_t>(osExtensionMetadata.size());
843 3 : offsetMD += sizeof(int32_t);
844 3 : memcpy(pszMetadata + offsetMD, osExtensionMetadata.c_str(),
845 : osExtensionMetadata.size());
846 3 : offsetMD += osExtensionMetadata.size();
847 : }
848 2276 : CPLAssert(offsetMD == nLen);
849 2276 : CPL_IGNORE_RET_VAL(offsetMD);
850 4552 : return psSchema;
851 : }
852 :
853 : /************************************************************************/
854 : /* StaticGetArrowSchema() */
855 : /************************************************************************/
856 :
857 : /** Default implementation of the ArrowArrayStream::get_schema() callback.
858 : *
859 : * To be used by driver implementations that have a custom GetArrowStream()
860 : * implementation.
861 : *
862 : * @since GDAL 3.6
863 : */
864 2470 : int OGRLayer::StaticGetArrowSchema(struct ArrowArrayStream *stream,
865 : struct ArrowSchema *out_schema)
866 : {
867 : auto poLayer = static_cast<ArrowArrayStreamPrivateDataSharedDataWrapper *>(
868 2470 : stream->private_data)
869 2470 : ->poShared->m_poLayer;
870 2470 : if (poLayer == nullptr)
871 : {
872 1 : CPLError(CE_Failure, CPLE_NotSupported,
873 : "Calling get_schema() on a freed OGRLayer is not supported");
874 1 : return EINVAL;
875 : }
876 2469 : return poLayer->GetArrowSchema(stream, out_schema);
877 : }
878 :
879 : /************************************************************************/
880 : /* DefaultReleaseArray() */
881 : /************************************************************************/
882 :
883 35059 : static void OGRLayerDefaultReleaseArray(struct ArrowArray *array)
884 : {
885 35059 : if (array->buffers)
886 : {
887 110162 : for (int i = 0; i < static_cast<int>(array->n_buffers); ++i)
888 75103 : VSIFreeAligned(const_cast<void *>(array->buffers[i]));
889 35059 : CPLFree(array->buffers);
890 : }
891 35059 : if (array->children)
892 : {
893 41002 : for (int i = 0; i < static_cast<int>(array->n_children); ++i)
894 : {
895 33237 : if (array->children[i] && array->children[i]->release)
896 : {
897 32864 : array->children[i]->release(array->children[i]);
898 32864 : CPLFree(array->children[i]);
899 : }
900 : }
901 7765 : CPLFree(array->children);
902 : }
903 35059 : if (array->dictionary)
904 : {
905 148 : if (array->dictionary->release)
906 : {
907 148 : array->dictionary->release(array->dictionary);
908 148 : CPLFree(array->dictionary);
909 : }
910 : }
911 35059 : array->release = nullptr;
912 35059 : }
913 :
914 : /** Release a ArrowArray.
915 : *
916 : * To be used by driver implementations that have a custom GetArrowStream()
917 : * implementation.
918 : *
919 : * @param array Arrow array to release.
920 : * @since GDAL 3.6
921 : */
922 3987 : void OGRLayer::ReleaseArray(struct ArrowArray *array)
923 : {
924 3987 : OGRLayerDefaultReleaseArray(array);
925 3987 : }
926 :
927 : /************************************************************************/
928 : /* IsValidField() */
929 : /************************************************************************/
930 :
931 88546 : static inline bool IsValidField(const OGRField *psRawField)
932 : {
933 103686 : return (!(psRawField->Set.nMarker1 == OGRUnsetMarker &&
934 7570 : psRawField->Set.nMarker2 == OGRUnsetMarker &&
935 177092 : psRawField->Set.nMarker3 == OGRUnsetMarker) &&
936 80976 : !(psRawField->Set.nMarker1 == OGRNullMarker &&
937 3213 : psRawField->Set.nMarker2 == OGRNullMarker &&
938 91759 : psRawField->Set.nMarker3 == OGRNullMarker));
939 : }
940 :
941 : /************************************************************************/
942 : /* AllocValidityBitmap() */
943 : /************************************************************************/
944 :
945 3516 : static uint8_t *AllocValidityBitmap(size_t nSize)
946 : {
947 : auto pabyValidity = static_cast<uint8_t *>(
948 3516 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE((1 + nSize + 7) / 8));
949 3516 : if (pabyValidity)
950 : {
951 : // All valid initially
952 3516 : memset(pabyValidity, 0xFF, (nSize + 7) / 8);
953 : }
954 3516 : return pabyValidity;
955 : }
956 :
957 : /************************************************************************/
958 : /* FillArray() */
959 : /************************************************************************/
960 :
961 : template <class T, typename TMember>
962 5821 : static bool FillArray(struct ArrowArray *psChild,
963 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
964 : const size_t nFeatureCountLimit, const bool bIsNullable,
965 : TMember member, const int i)
966 : {
967 5821 : psChild->n_buffers = 2;
968 5821 : psChild->buffers = static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
969 5821 : uint8_t *pabyValidity = nullptr;
970 : T *panValues = static_cast<T *>(
971 5821 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(T) * (1 + nFeatureCountLimit)));
972 5821 : if (panValues == nullptr)
973 0 : return false;
974 5821 : psChild->buffers[1] = panValues;
975 53751 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat)
976 : {
977 47930 : auto &poFeature = apoFeatures[iFeat];
978 47930 : const auto psRawField = poFeature->GetRawFieldRef(i);
979 47930 : if (IsValidField(psRawField))
980 : {
981 43012 : panValues[iFeat] = static_cast<T>((*psRawField).*member);
982 : }
983 4918 : else if (bIsNullable)
984 : {
985 4918 : panValues[iFeat] = 0;
986 4918 : ++psChild->null_count;
987 4918 : if (pabyValidity == nullptr)
988 : {
989 1235 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
990 1235 : psChild->buffers[0] = pabyValidity;
991 1235 : if (pabyValidity == nullptr)
992 0 : return false;
993 : }
994 4918 : UnsetBit(pabyValidity, iFeat);
995 : }
996 : else
997 : {
998 0 : panValues[iFeat] = 0;
999 : }
1000 : }
1001 5821 : return true;
1002 : }
1003 :
1004 : /************************************************************************/
1005 : /* FillBoolArray() */
1006 : /************************************************************************/
1007 :
1008 : template <typename TMember>
1009 138 : static bool FillBoolArray(struct ArrowArray *psChild,
1010 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1011 : const size_t nFeatureCountLimit,
1012 : const bool bIsNullable, TMember member, const int i)
1013 : {
1014 138 : psChild->n_buffers = 2;
1015 138 : psChild->buffers = static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1016 138 : uint8_t *pabyValidity = nullptr;
1017 : uint8_t *panValues = static_cast<uint8_t *>(
1018 138 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE((nFeatureCountLimit + 7 + 1) / 8));
1019 138 : if (panValues == nullptr)
1020 0 : return false;
1021 138 : memset(panValues, 0, (nFeatureCountLimit + 7) / 8);
1022 138 : psChild->buffers[1] = panValues;
1023 601 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat)
1024 : {
1025 463 : auto &poFeature = apoFeatures[iFeat];
1026 463 : const auto psRawField = poFeature->GetRawFieldRef(i);
1027 463 : if (IsValidField(psRawField))
1028 : {
1029 405 : if ((*psRawField).*member)
1030 81 : SetBit(panValues, iFeat);
1031 : }
1032 58 : else if (bIsNullable)
1033 : {
1034 58 : ++psChild->null_count;
1035 58 : if (pabyValidity == nullptr)
1036 : {
1037 46 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1038 46 : psChild->buffers[0] = pabyValidity;
1039 46 : if (pabyValidity == nullptr)
1040 0 : return false;
1041 : }
1042 58 : UnsetBit(pabyValidity, iFeat);
1043 : }
1044 : }
1045 138 : return true;
1046 : }
1047 :
1048 : /************************************************************************/
1049 : /* FillListArray() */
1050 : /************************************************************************/
1051 :
1052 : struct GetFromIntegerList
1053 : {
1054 555 : static inline int getCount(const OGRField *psRawField)
1055 : {
1056 555 : return psRawField->IntegerList.nCount;
1057 : }
1058 :
1059 276 : static inline const int *getValues(const OGRField *psRawField)
1060 : {
1061 276 : return psRawField->IntegerList.paList;
1062 : }
1063 : };
1064 :
1065 : struct GetFromInteger64List
1066 : {
1067 242 : static inline int getCount(const OGRField *psRawField)
1068 : {
1069 242 : return psRawField->Integer64List.nCount;
1070 : }
1071 :
1072 120 : static inline const GIntBig *getValues(const OGRField *psRawField)
1073 : {
1074 120 : return psRawField->Integer64List.paList;
1075 : }
1076 : };
1077 :
1078 : struct GetFromRealList
1079 : {
1080 374 : static inline int getCount(const OGRField *psRawField)
1081 : {
1082 374 : return psRawField->RealList.nCount;
1083 : }
1084 :
1085 186 : static inline const double *getValues(const OGRField *psRawField)
1086 : {
1087 186 : return psRawField->RealList.paList;
1088 : }
1089 : };
1090 :
1091 : template <class OffsetType, class T, class GetFromList>
1092 : static size_t
1093 416 : FillListArray(struct ArrowArray *psChild,
1094 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1095 : const size_t nFeatureCountLimit, const bool bIsNullable,
1096 : const int i, const size_t nMemLimit)
1097 : {
1098 416 : psChild->n_buffers = 2;
1099 416 : psChild->buffers = static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1100 416 : uint8_t *pabyValidity = nullptr;
1101 : OffsetType *panOffsets =
1102 416 : static_cast<OffsetType *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
1103 : sizeof(OffsetType) * (1 + nFeatureCountLimit)));
1104 416 : if (panOffsets == nullptr)
1105 0 : return 0;
1106 416 : psChild->buffers[1] = panOffsets;
1107 :
1108 416 : OffsetType nOffset = 0;
1109 416 : size_t nFeatCount = 0;
1110 1445 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat, ++nFeatCount)
1111 : {
1112 1035 : panOffsets[iFeat] = nOffset;
1113 1035 : auto &poFeature = apoFeatures[iFeat];
1114 1035 : const auto psRawField = poFeature->GetRawFieldRef(i);
1115 1035 : if (IsValidField(psRawField))
1116 : {
1117 529 : const unsigned nCount = GetFromList::getCount(psRawField);
1118 529 : if (nCount > static_cast<size_t>(nMemLimit - nOffset))
1119 : {
1120 6 : if (nFeatCount == 0)
1121 3 : return 0;
1122 3 : break;
1123 : }
1124 523 : nOffset += static_cast<OffsetType>(nCount);
1125 : }
1126 506 : else if (bIsNullable)
1127 : {
1128 506 : ++psChild->null_count;
1129 506 : if (pabyValidity == nullptr)
1130 : {
1131 231 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1132 231 : psChild->buffers[0] = pabyValidity;
1133 231 : if (pabyValidity == nullptr)
1134 0 : return 0;
1135 : }
1136 506 : UnsetBit(pabyValidity, iFeat);
1137 : }
1138 : }
1139 413 : panOffsets[nFeatCount] = nOffset;
1140 :
1141 413 : psChild->n_children = 1;
1142 413 : psChild->children = static_cast<struct ArrowArray **>(
1143 413 : CPLCalloc(1, sizeof(struct ArrowArray *)));
1144 826 : psChild->children[0] = static_cast<struct ArrowArray *>(
1145 413 : CPLCalloc(1, sizeof(struct ArrowArray)));
1146 413 : auto psValueChild = psChild->children[0];
1147 :
1148 413 : psValueChild->release = OGRLayerDefaultReleaseArray;
1149 413 : psValueChild->n_buffers = 2;
1150 413 : psValueChild->buffers =
1151 413 : static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1152 413 : psValueChild->length = nOffset;
1153 : T *panValues = static_cast<T *>(
1154 413 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(T) * (nOffset + 1)));
1155 413 : if (panValues == nullptr)
1156 0 : return 0;
1157 413 : psValueChild->buffers[1] = panValues;
1158 :
1159 413 : nOffset = 0;
1160 1442 : for (size_t iFeat = 0; iFeat < nFeatCount; ++iFeat)
1161 : {
1162 1029 : auto &poFeature = apoFeatures[iFeat];
1163 1029 : const auto psRawField = poFeature->GetRawFieldRef(i);
1164 1029 : if (IsValidField(psRawField))
1165 : {
1166 523 : const int nCount = GetFromList::getCount(psRawField);
1167 523 : const auto paList = GetFromList::getValues(psRawField);
1168 : if (sizeof(*paList) == sizeof(T))
1169 456 : memcpy(panValues + nOffset, paList, nCount * sizeof(T));
1170 : else
1171 : {
1172 203 : for (int j = 0; j < nCount; ++j)
1173 : {
1174 136 : panValues[nOffset + j] = static_cast<T>(paList[j]);
1175 : }
1176 : }
1177 523 : nOffset += static_cast<OffsetType>(nCount);
1178 : }
1179 : }
1180 :
1181 413 : return nFeatCount;
1182 : }
1183 :
1184 : template <class OffsetType, class GetFromList>
1185 : static size_t
1186 49 : FillListArrayBool(struct ArrowArray *psChild,
1187 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1188 : const size_t nFeatureCountLimit, const bool bIsNullable,
1189 : const int i, const size_t nMemLimit)
1190 : {
1191 49 : psChild->n_buffers = 2;
1192 49 : psChild->buffers = static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1193 49 : uint8_t *pabyValidity = nullptr;
1194 : OffsetType *panOffsets =
1195 49 : static_cast<OffsetType *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
1196 : sizeof(OffsetType) * (1 + nFeatureCountLimit)));
1197 49 : if (panOffsets == nullptr)
1198 0 : return 0;
1199 49 : psChild->buffers[1] = panOffsets;
1200 :
1201 49 : OffsetType nOffset = 0;
1202 49 : size_t nFeatCount = 0;
1203 138 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat, ++nFeatCount)
1204 : {
1205 91 : panOffsets[iFeat] = nOffset;
1206 91 : auto &poFeature = apoFeatures[iFeat];
1207 91 : const auto psRawField = poFeature->GetRawFieldRef(i);
1208 91 : if (IsValidField(psRawField))
1209 : {
1210 60 : const unsigned nCount = GetFromList::getCount(psRawField);
1211 60 : if (nCount > static_cast<size_t>(nMemLimit - nOffset))
1212 : {
1213 2 : if (nFeatCount == 0)
1214 1 : return 0;
1215 1 : break;
1216 : }
1217 58 : nOffset += static_cast<OffsetType>(nCount);
1218 : }
1219 31 : else if (bIsNullable)
1220 : {
1221 31 : ++psChild->null_count;
1222 31 : if (pabyValidity == nullptr)
1223 : {
1224 27 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1225 27 : psChild->buffers[0] = pabyValidity;
1226 27 : if (pabyValidity == nullptr)
1227 0 : return 0;
1228 : }
1229 31 : UnsetBit(pabyValidity, iFeat);
1230 : }
1231 : }
1232 48 : panOffsets[nFeatCount] = nOffset;
1233 :
1234 48 : psChild->n_children = 1;
1235 48 : psChild->children = static_cast<struct ArrowArray **>(
1236 48 : CPLCalloc(1, sizeof(struct ArrowArray *)));
1237 96 : psChild->children[0] = static_cast<struct ArrowArray *>(
1238 48 : CPLCalloc(1, sizeof(struct ArrowArray)));
1239 48 : auto psValueChild = psChild->children[0];
1240 :
1241 48 : psValueChild->release = OGRLayerDefaultReleaseArray;
1242 48 : psValueChild->n_buffers = 2;
1243 48 : psValueChild->buffers =
1244 48 : static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1245 48 : psValueChild->length = nOffset;
1246 : uint8_t *panValues = static_cast<uint8_t *>(
1247 48 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE((nOffset + 7 + 1) / 8));
1248 48 : if (panValues == nullptr)
1249 0 : return 0;
1250 48 : memset(panValues, 0, (nOffset + 7) / 8);
1251 48 : psValueChild->buffers[1] = panValues;
1252 :
1253 48 : nOffset = 0;
1254 138 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat)
1255 : {
1256 90 : auto &poFeature = apoFeatures[iFeat];
1257 90 : const auto psRawField = poFeature->GetRawFieldRef(i);
1258 90 : if (IsValidField(psRawField))
1259 : {
1260 59 : const int nCount = GetFromList::getCount(psRawField);
1261 59 : const auto paList = GetFromList::getValues(psRawField);
1262 :
1263 373 : for (int j = 0; j < nCount; ++j)
1264 : {
1265 314 : if (paList[j])
1266 55 : SetBit(panValues, nOffset + j);
1267 : }
1268 59 : nOffset += static_cast<OffsetType>(nCount);
1269 : }
1270 : }
1271 :
1272 48 : return nFeatCount;
1273 : }
1274 :
1275 : /************************************************************************/
1276 : /* FillStringArray() */
1277 : /************************************************************************/
1278 :
1279 : template <class T>
1280 : static size_t
1281 3772 : FillStringArray(struct ArrowArray *psChild,
1282 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1283 : const size_t nFeatureCountLimit, const bool bIsNullable,
1284 : const int i, const size_t nMemLimit)
1285 : {
1286 3772 : psChild->n_buffers = 3;
1287 3772 : psChild->buffers = static_cast<const void **>(CPLCalloc(3, sizeof(void *)));
1288 3772 : uint8_t *pabyValidity = nullptr;
1289 : T *panOffsets = static_cast<T *>(
1290 3772 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(T) * (1 + nFeatureCountLimit)));
1291 3772 : if (panOffsets == nullptr)
1292 0 : return 0;
1293 3772 : psChild->buffers[1] = panOffsets;
1294 :
1295 3772 : size_t nOffset = 0;
1296 3772 : size_t nFeatCount = 0;
1297 33982 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat, ++nFeatCount)
1298 : {
1299 30230 : panOffsets[iFeat] = static_cast<T>(nOffset);
1300 30230 : const auto psRawField = apoFeatures[iFeat]->GetRawFieldRef(i);
1301 30230 : if (IsValidField(psRawField))
1302 : {
1303 26890 : const size_t nLen = strlen(psRawField->String);
1304 26890 : if (nLen > nMemLimit - nOffset)
1305 : {
1306 20 : if (nFeatCount == 0)
1307 19 : return 0;
1308 1 : break;
1309 : }
1310 26870 : nOffset += static_cast<T>(nLen);
1311 : }
1312 3340 : else if (bIsNullable)
1313 : {
1314 3340 : ++psChild->null_count;
1315 3340 : if (pabyValidity == nullptr)
1316 : {
1317 1131 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1318 1131 : psChild->buffers[0] = pabyValidity;
1319 1131 : if (pabyValidity == nullptr)
1320 0 : return 0;
1321 : }
1322 3340 : UnsetBit(pabyValidity, iFeat);
1323 : }
1324 : }
1325 3753 : panOffsets[nFeatCount] = static_cast<T>(nOffset);
1326 :
1327 : char *pachValues =
1328 3753 : static_cast<char *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nOffset + 1));
1329 3753 : if (pachValues == nullptr)
1330 0 : return 0;
1331 3753 : psChild->buffers[2] = pachValues;
1332 :
1333 3753 : nOffset = 0;
1334 33963 : for (size_t iFeat = 0; iFeat < nFeatCount; ++iFeat)
1335 : {
1336 30210 : const size_t nLen =
1337 30210 : static_cast<size_t>(panOffsets[iFeat + 1] - panOffsets[iFeat]);
1338 30210 : if (nLen)
1339 : {
1340 25318 : const auto psRawField = apoFeatures[iFeat]->GetRawFieldRef(i);
1341 25318 : memcpy(pachValues + nOffset, psRawField->String, nLen);
1342 25318 : nOffset += nLen;
1343 : }
1344 : }
1345 :
1346 3753 : return nFeatCount;
1347 : }
1348 :
1349 : /************************************************************************/
1350 : /* FillStringListArray() */
1351 : /************************************************************************/
1352 :
1353 : template <class OffsetType>
1354 : static size_t
1355 203 : FillStringListArray(struct ArrowArray *psChild,
1356 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1357 : const size_t nFeatureCountLimit, const bool bIsNullable,
1358 : const int i, const size_t nMemLimit)
1359 : {
1360 203 : psChild->n_buffers = 2;
1361 203 : psChild->buffers = static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1362 203 : uint8_t *pabyValidity = nullptr;
1363 : OffsetType *panOffsets =
1364 203 : static_cast<OffsetType *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
1365 : sizeof(OffsetType) * (1 + nFeatureCountLimit)));
1366 203 : if (panOffsets == nullptr)
1367 0 : return false;
1368 203 : psChild->buffers[1] = panOffsets;
1369 :
1370 203 : OffsetType nStrings = 0;
1371 203 : OffsetType nCountChars = 0;
1372 203 : size_t nFeatCount = 0;
1373 516 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat, ++nFeatCount)
1374 : {
1375 315 : panOffsets[iFeat] = nStrings;
1376 315 : auto &poFeature = apoFeatures[iFeat];
1377 315 : const auto psRawField = poFeature->GetRawFieldRef(i);
1378 315 : if (IsValidField(psRawField))
1379 : {
1380 108 : const int nCount = psRawField->StringList.nCount;
1381 108 : if (static_cast<size_t>(nCount) >
1382 108 : static_cast<size_t>(nMemLimit - nStrings))
1383 : {
1384 0 : if (nFeatCount == 0)
1385 0 : return 0;
1386 0 : goto after_loop;
1387 : }
1388 280 : for (int j = 0; j < nCount; ++j)
1389 : {
1390 174 : const size_t nLen = strlen(psRawField->StringList.paList[j]);
1391 174 : if (nLen > static_cast<size_t>(nMemLimit - nCountChars))
1392 : {
1393 2 : if (nFeatCount == 0)
1394 1 : return 0;
1395 1 : goto after_loop;
1396 : }
1397 172 : nCountChars += static_cast<OffsetType>(nLen);
1398 : }
1399 106 : nStrings += static_cast<OffsetType>(nCount);
1400 : }
1401 207 : else if (bIsNullable)
1402 : {
1403 207 : ++psChild->null_count;
1404 207 : if (pabyValidity == nullptr)
1405 : {
1406 152 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1407 152 : psChild->buffers[0] = pabyValidity;
1408 152 : if (pabyValidity == nullptr)
1409 0 : return 0;
1410 : }
1411 207 : UnsetBit(pabyValidity, iFeat);
1412 : }
1413 : }
1414 201 : after_loop:
1415 202 : panOffsets[nFeatCount] = nStrings;
1416 :
1417 202 : psChild->n_children = 1;
1418 202 : psChild->children = static_cast<struct ArrowArray **>(
1419 202 : CPLCalloc(1, sizeof(struct ArrowArray *)));
1420 404 : psChild->children[0] = static_cast<struct ArrowArray *>(
1421 202 : CPLCalloc(1, sizeof(struct ArrowArray)));
1422 202 : auto psValueChild = psChild->children[0];
1423 :
1424 202 : psValueChild->release = OGRLayerDefaultReleaseArray;
1425 202 : psValueChild->length = nStrings;
1426 202 : psValueChild->n_buffers = 3;
1427 202 : psValueChild->buffers =
1428 202 : static_cast<const void **>(CPLCalloc(3, sizeof(void *)));
1429 :
1430 : OffsetType *panChildOffsets = static_cast<OffsetType *>(
1431 202 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(OffsetType) * (1 + nStrings)));
1432 202 : if (panChildOffsets == nullptr)
1433 0 : return 0;
1434 202 : psValueChild->buffers[1] = panChildOffsets;
1435 :
1436 : char *pachValues =
1437 202 : static_cast<char *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nCountChars + 1));
1438 202 : if (pachValues == nullptr)
1439 0 : return 0;
1440 202 : psValueChild->buffers[2] = pachValues;
1441 :
1442 202 : nStrings = 0;
1443 202 : nCountChars = 0;
1444 515 : for (size_t iFeat = 0; iFeat < nFeatCount; ++iFeat)
1445 : {
1446 313 : auto &poFeature = apoFeatures[iFeat];
1447 313 : const auto psRawField = poFeature->GetRawFieldRef(i);
1448 313 : if (IsValidField(psRawField))
1449 : {
1450 106 : const int nCount = psRawField->StringList.nCount;
1451 278 : for (int j = 0; j < nCount; ++j)
1452 : {
1453 172 : panChildOffsets[nStrings] = nCountChars;
1454 172 : ++nStrings;
1455 172 : const size_t nLen = strlen(psRawField->StringList.paList[j]);
1456 172 : memcpy(pachValues + nCountChars,
1457 172 : psRawField->StringList.paList[j], nLen);
1458 172 : nCountChars += static_cast<OffsetType>(nLen);
1459 : }
1460 : }
1461 : }
1462 202 : panChildOffsets[nStrings] = nCountChars;
1463 :
1464 202 : return nFeatCount;
1465 : }
1466 :
1467 : /************************************************************************/
1468 : /* FillBinaryArray() */
1469 : /************************************************************************/
1470 :
1471 : template <class T>
1472 : static size_t
1473 905 : FillBinaryArray(struct ArrowArray *psChild,
1474 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1475 : const size_t nFeatureCountLimit, const bool bIsNullable,
1476 : const int i, const size_t nMemLimit)
1477 : {
1478 905 : psChild->n_buffers = 3;
1479 905 : psChild->buffers = static_cast<const void **>(CPLCalloc(3, sizeof(void *)));
1480 905 : uint8_t *pabyValidity = nullptr;
1481 : T *panOffsets = static_cast<T *>(
1482 905 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(T) * (1 + nFeatureCountLimit)));
1483 905 : if (panOffsets == nullptr)
1484 0 : return 0;
1485 905 : psChild->buffers[1] = panOffsets;
1486 :
1487 905 : T nOffset = 0;
1488 905 : size_t nFeatCount = 0;
1489 4362 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat, ++nFeatCount)
1490 : {
1491 3459 : panOffsets[iFeat] = nOffset;
1492 3459 : const auto psRawField = apoFeatures[iFeat]->GetRawFieldRef(i);
1493 3459 : if (IsValidField(psRawField))
1494 : {
1495 3402 : const size_t nLen = psRawField->Binary.nCount;
1496 3402 : if (nLen > static_cast<size_t>(nMemLimit - nOffset))
1497 : {
1498 2 : if (iFeat == 0)
1499 1 : return 0;
1500 1 : break;
1501 : }
1502 3400 : nOffset += static_cast<T>(nLen);
1503 : }
1504 57 : else if (bIsNullable)
1505 : {
1506 57 : ++psChild->null_count;
1507 57 : if (pabyValidity == nullptr)
1508 : {
1509 49 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1510 49 : psChild->buffers[0] = pabyValidity;
1511 49 : if (pabyValidity == nullptr)
1512 0 : return 0;
1513 : }
1514 57 : UnsetBit(pabyValidity, iFeat);
1515 : }
1516 : }
1517 904 : panOffsets[nFeatCount] = nOffset;
1518 :
1519 : GByte *pabyValues =
1520 904 : static_cast<GByte *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nOffset + 1));
1521 904 : if (pabyValues == nullptr)
1522 0 : return 0;
1523 904 : psChild->buffers[2] = pabyValues;
1524 :
1525 904 : nOffset = 0;
1526 4361 : for (size_t iFeat = 0; iFeat < nFeatCount; ++iFeat)
1527 : {
1528 3457 : const size_t nLen =
1529 3457 : static_cast<size_t>(panOffsets[iFeat + 1] - panOffsets[iFeat]);
1530 3457 : if (nLen)
1531 : {
1532 3400 : const auto psRawField = apoFeatures[iFeat]->GetRawFieldRef(i);
1533 3400 : memcpy(pabyValues + nOffset, psRawField->Binary.paData, nLen);
1534 3400 : nOffset += static_cast<T>(nLen);
1535 : }
1536 : }
1537 :
1538 904 : return nFeatCount;
1539 : }
1540 :
1541 : /************************************************************************/
1542 : /* FillFixedWidthBinaryArray() */
1543 : /************************************************************************/
1544 :
1545 : static bool
1546 8 : FillFixedWidthBinaryArray(struct ArrowArray *psChild,
1547 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1548 : const size_t nFeatureCountLimit,
1549 : const bool bIsNullable, const int nWidth, const int i)
1550 : {
1551 8 : psChild->n_buffers = 2;
1552 8 : psChild->buffers = static_cast<const void **>(CPLCalloc(3, sizeof(void *)));
1553 8 : uint8_t *pabyValidity = nullptr;
1554 :
1555 8 : assert(nFeatureCountLimit + 1 <=
1556 : std::numeric_limits<size_t>::max() / nWidth);
1557 : GByte *pabyValues = static_cast<GByte *>(
1558 8 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE((nFeatureCountLimit + 1) * nWidth));
1559 8 : if (pabyValues == nullptr)
1560 0 : return false;
1561 8 : psChild->buffers[1] = pabyValues;
1562 :
1563 29 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat)
1564 : {
1565 21 : const auto psRawField = apoFeatures[iFeat]->GetRawFieldRef(i);
1566 21 : if (IsValidField(psRawField))
1567 : {
1568 20 : const auto nLen = psRawField->Binary.nCount;
1569 20 : if (nLen < nWidth)
1570 : {
1571 0 : memcpy(pabyValues + iFeat * nWidth, psRawField->Binary.paData,
1572 : nLen);
1573 0 : memset(pabyValues + iFeat * nWidth + nLen, 0, nWidth - nLen);
1574 : }
1575 : else
1576 : {
1577 20 : memcpy(pabyValues + iFeat * nWidth, psRawField->Binary.paData,
1578 : nWidth);
1579 : }
1580 : }
1581 : else
1582 : {
1583 1 : memset(pabyValues + iFeat * nWidth, 0, nWidth);
1584 1 : if (bIsNullable)
1585 : {
1586 1 : ++psChild->null_count;
1587 1 : if (pabyValidity == nullptr)
1588 : {
1589 1 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1590 1 : psChild->buffers[0] = pabyValidity;
1591 1 : if (pabyValidity == nullptr)
1592 0 : return false;
1593 : }
1594 1 : UnsetBit(pabyValidity, iFeat);
1595 : }
1596 : }
1597 : }
1598 :
1599 8 : return true;
1600 : }
1601 :
1602 : /************************************************************************/
1603 : /* FillWKBGeometryArray() */
1604 : /************************************************************************/
1605 :
1606 : template <class T>
1607 : static size_t
1608 1257 : FillWKBGeometryArray(struct ArrowArray *psChild,
1609 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1610 : const size_t nFeatureCountLimit,
1611 : const OGRGeomFieldDefn *poFieldDefn, const int i,
1612 : const size_t nMemLimit)
1613 : {
1614 1257 : const bool bIsNullable = CPL_TO_BOOL(poFieldDefn->IsNullable());
1615 1257 : psChild->n_buffers = 3;
1616 1257 : psChild->buffers = static_cast<const void **>(CPLCalloc(3, sizeof(void *)));
1617 1257 : uint8_t *pabyValidity = nullptr;
1618 : T *panOffsets = static_cast<T *>(
1619 1257 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(T) * (1 + nFeatureCountLimit)));
1620 1257 : if (panOffsets == nullptr)
1621 0 : return 0;
1622 1257 : psChild->buffers[1] = panOffsets;
1623 1257 : const auto eGeomType = poFieldDefn->GetType();
1624 3771 : auto poEmptyGeom =
1625 : std::unique_ptr<OGRGeometry>(OGRGeometryFactory::createGeometry(
1626 1257 : (eGeomType == wkbNone || wkbFlatten(eGeomType) == wkbUnknown)
1627 : ? wkbGeometryCollection
1628 : : eGeomType));
1629 :
1630 1257 : size_t nOffset = 0;
1631 1257 : size_t nFeatCount = 0;
1632 14401 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat, ++nFeatCount)
1633 : {
1634 13145 : panOffsets[iFeat] = static_cast<T>(nOffset);
1635 13145 : const auto poGeom = apoFeatures[iFeat]->GetGeomFieldRef(i);
1636 13145 : if (poGeom != nullptr)
1637 : {
1638 12570 : const size_t nLen = poGeom->WkbSize();
1639 12570 : if (nLen > nMemLimit - nOffset)
1640 : {
1641 1 : if (nFeatCount == 0)
1642 0 : return 0;
1643 1 : break;
1644 : }
1645 12569 : nOffset += static_cast<T>(nLen);
1646 : }
1647 575 : else if (bIsNullable)
1648 : {
1649 575 : ++psChild->null_count;
1650 575 : if (pabyValidity == nullptr)
1651 : {
1652 277 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1653 277 : psChild->buffers[0] = pabyValidity;
1654 277 : if (pabyValidity == nullptr)
1655 0 : return 0;
1656 : }
1657 575 : UnsetBit(pabyValidity, iFeat);
1658 : }
1659 0 : else if (poEmptyGeom)
1660 : {
1661 0 : const size_t nLen = poEmptyGeom->WkbSize();
1662 0 : if (nLen > nMemLimit - nOffset)
1663 : {
1664 0 : if (nFeatCount == 0)
1665 0 : return 0;
1666 0 : break;
1667 : }
1668 0 : nOffset += static_cast<T>(nLen);
1669 : }
1670 : }
1671 1257 : panOffsets[nFeatCount] = static_cast<T>(nOffset);
1672 :
1673 : GByte *pabyValues =
1674 1257 : static_cast<GByte *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nOffset + 1));
1675 1257 : if (pabyValues == nullptr)
1676 0 : return 0;
1677 1257 : psChild->buffers[2] = pabyValues;
1678 :
1679 1257 : nOffset = 0;
1680 14401 : for (size_t iFeat = 0; iFeat < nFeatCount; ++iFeat)
1681 : {
1682 13144 : const size_t nLen =
1683 13144 : static_cast<size_t>(panOffsets[iFeat + 1] - panOffsets[iFeat]);
1684 13144 : if (nLen)
1685 : {
1686 12569 : const auto poGeom = apoFeatures[iFeat]->GetGeomFieldRef(i);
1687 12569 : poGeom->exportToWkb(wkbNDR, pabyValues + nOffset, wkbVariantIso);
1688 12569 : nOffset += nLen;
1689 : }
1690 575 : else if (!bIsNullable && poEmptyGeom)
1691 : {
1692 0 : poEmptyGeom->exportToWkb(wkbNDR, pabyValues + nOffset,
1693 : wkbVariantIso);
1694 0 : nOffset += nLen;
1695 : }
1696 : }
1697 :
1698 1257 : return nFeatCount;
1699 : }
1700 :
1701 : /************************************************************************/
1702 : /* FillDateArray() */
1703 : /************************************************************************/
1704 :
1705 125 : static bool FillDateArray(struct ArrowArray *psChild,
1706 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1707 : const size_t nFeatureCountLimit,
1708 : const bool bIsNullable, const int i)
1709 : {
1710 125 : psChild->n_buffers = 2;
1711 125 : psChild->buffers = static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1712 125 : uint8_t *pabyValidity = nullptr;
1713 125 : int32_t *panValues = static_cast<int32_t *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
1714 : sizeof(int32_t) * (nFeatureCountLimit + 1)));
1715 125 : if (panValues == nullptr)
1716 0 : return false;
1717 125 : psChild->buffers[1] = panValues;
1718 475 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat)
1719 : {
1720 350 : auto &poFeature = apoFeatures[iFeat];
1721 350 : const auto psRawField = poFeature->GetRawFieldRef(i);
1722 350 : if (IsValidField(psRawField))
1723 : {
1724 : struct tm brokenDown;
1725 262 : memset(&brokenDown, 0, sizeof(brokenDown));
1726 262 : brokenDown.tm_year = psRawField->Date.Year - 1900;
1727 262 : brokenDown.tm_mon = psRawField->Date.Month - 1;
1728 262 : brokenDown.tm_mday = psRawField->Date.Day;
1729 262 : panValues[iFeat] =
1730 262 : static_cast<int>(CPLYMDHMSToUnixTime(&brokenDown) / 86400);
1731 : }
1732 88 : else if (bIsNullable)
1733 : {
1734 88 : panValues[iFeat] = 0;
1735 88 : ++psChild->null_count;
1736 88 : if (pabyValidity == nullptr)
1737 : {
1738 61 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1739 61 : psChild->buffers[0] = pabyValidity;
1740 61 : if (pabyValidity == nullptr)
1741 0 : return false;
1742 : }
1743 88 : UnsetBit(pabyValidity, iFeat);
1744 : }
1745 : else
1746 : {
1747 0 : panValues[iFeat] = 0;
1748 : }
1749 : }
1750 125 : return true;
1751 : }
1752 :
1753 : /************************************************************************/
1754 : /* FillTimeArray() */
1755 : /************************************************************************/
1756 :
1757 72 : static bool FillTimeArray(struct ArrowArray *psChild,
1758 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1759 : const size_t nFeatureCountLimit,
1760 : const bool bIsNullable, const int i)
1761 : {
1762 72 : psChild->n_buffers = 2;
1763 72 : psChild->buffers = static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1764 72 : uint8_t *pabyValidity = nullptr;
1765 72 : int32_t *panValues = static_cast<int32_t *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
1766 : sizeof(int32_t) * (nFeatureCountLimit + 1)));
1767 72 : if (panValues == nullptr)
1768 0 : return false;
1769 72 : psChild->buffers[1] = panValues;
1770 667 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat)
1771 : {
1772 595 : auto &poFeature = apoFeatures[iFeat];
1773 595 : const auto psRawField = poFeature->GetRawFieldRef(i);
1774 595 : if (IsValidField(psRawField))
1775 : {
1776 548 : panValues[iFeat] =
1777 548 : psRawField->Date.Hour * 3600000 +
1778 548 : psRawField->Date.Minute * 60000 +
1779 548 : static_cast<int>(psRawField->Date.Second * 1000 + 0.5f);
1780 : }
1781 47 : else if (bIsNullable)
1782 : {
1783 47 : panValues[iFeat] = 0;
1784 47 : ++psChild->null_count;
1785 47 : if (pabyValidity == nullptr)
1786 : {
1787 39 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1788 39 : psChild->buffers[0] = pabyValidity;
1789 39 : if (pabyValidity == nullptr)
1790 0 : return false;
1791 : }
1792 47 : UnsetBit(pabyValidity, iFeat);
1793 : }
1794 : else
1795 : {
1796 0 : panValues[iFeat] = 0;
1797 : }
1798 : }
1799 72 : return true;
1800 : }
1801 :
1802 : /************************************************************************/
1803 : /* FillDateTimeArray() */
1804 : /************************************************************************/
1805 :
1806 : static bool
1807 712 : FillDateTimeArray(struct ArrowArray *psChild,
1808 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1809 : const size_t nFeatureCountLimit, const bool bIsNullable,
1810 : const int i, int nFieldTZFlag)
1811 : {
1812 712 : psChild->n_buffers = 2;
1813 712 : psChild->buffers = static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1814 712 : uint8_t *pabyValidity = nullptr;
1815 712 : int64_t *panValues = static_cast<int64_t *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
1816 : sizeof(int64_t) * (nFeatureCountLimit + 1)));
1817 712 : if (panValues == nullptr)
1818 0 : return false;
1819 712 : psChild->buffers[1] = panValues;
1820 : struct tm brokenDown;
1821 712 : memset(&brokenDown, 0, sizeof(brokenDown));
1822 3141 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat)
1823 : {
1824 2429 : auto &poFeature = apoFeatures[iFeat];
1825 2429 : const auto psRawField = poFeature->GetRawFieldRef(i);
1826 2429 : if (IsValidField(psRawField))
1827 : {
1828 1670 : brokenDown.tm_year = psRawField->Date.Year - 1900;
1829 1670 : brokenDown.tm_mon = psRawField->Date.Month - 1;
1830 1670 : brokenDown.tm_mday = psRawField->Date.Day;
1831 1670 : brokenDown.tm_hour = psRawField->Date.Hour;
1832 1670 : brokenDown.tm_min = psRawField->Date.Minute;
1833 1670 : brokenDown.tm_sec = static_cast<int>(psRawField->Date.Second);
1834 : auto nVal =
1835 1670 : CPLYMDHMSToUnixTime(&brokenDown) * 1000 +
1836 1670 : (static_cast<int>(psRawField->Date.Second * 1000 + 0.5f) %
1837 1670 : 1000);
1838 1670 : if (nFieldTZFlag >= OGR_TZFLAG_MIXED_TZ &&
1839 65 : psRawField->Date.TZFlag > OGR_TZFLAG_MIXED_TZ)
1840 : {
1841 : // Convert for psRawField->Date.TZFlag to UTC
1842 65 : const int TZOffset =
1843 65 : (psRawField->Date.TZFlag - OGR_TZFLAG_UTC) * 15;
1844 65 : const int TZOffsetMS = TZOffset * 60 * 1000;
1845 65 : nVal -= TZOffsetMS;
1846 : }
1847 1670 : panValues[iFeat] = nVal;
1848 : }
1849 759 : else if (bIsNullable)
1850 : {
1851 759 : panValues[iFeat] = 0;
1852 759 : ++psChild->null_count;
1853 759 : if (pabyValidity == nullptr)
1854 : {
1855 261 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1856 261 : psChild->buffers[0] = pabyValidity;
1857 261 : if (pabyValidity == nullptr)
1858 0 : return false;
1859 : }
1860 759 : UnsetBit(pabyValidity, iFeat);
1861 : }
1862 : else
1863 : {
1864 0 : panValues[iFeat] = 0;
1865 : }
1866 : }
1867 712 : return true;
1868 : }
1869 :
1870 : /************************************************************************/
1871 : /* FillDateTimeArrayWithTimeZone() */
1872 : /************************************************************************/
1873 :
1874 3 : static bool FillDateTimeArrayWithTimeZone(
1875 : struct ArrowArray *psChild,
1876 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1877 : const size_t nFeatureCountLimit, const bool bIsNullable, const int i,
1878 : int nFieldTZFlag)
1879 : {
1880 3 : psChild->n_children = 2;
1881 3 : psChild->children = static_cast<struct ArrowArray **>(
1882 3 : CPLCalloc(2, sizeof(struct ArrowArray *)));
1883 3 : psChild->n_buffers = 1;
1884 3 : psChild->buffers = static_cast<const void **>(CPLCalloc(1, sizeof(void *)));
1885 3 : uint8_t *pabyValidity = nullptr;
1886 :
1887 : // Create sub-array for timestamp in UTC
1888 6 : psChild->children[0] = static_cast<struct ArrowArray *>(
1889 3 : CPLCalloc(1, sizeof(struct ArrowArray)));
1890 3 : psChild->children[0]->n_buffers = 2;
1891 6 : psChild->children[0]->buffers =
1892 3 : static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1893 3 : psChild->children[0]->release = OGRLayerDefaultReleaseArray;
1894 : int64_t *panTimestamps = static_cast<int64_t *>(
1895 3 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(int64_t) * nFeatureCountLimit));
1896 3 : if (panTimestamps == nullptr)
1897 0 : return false;
1898 3 : psChild->children[0]->buffers[1] = panTimestamps;
1899 :
1900 : // Create sub-array for offset to UTC in minutes
1901 6 : psChild->children[1] = static_cast<struct ArrowArray *>(
1902 3 : CPLCalloc(1, sizeof(struct ArrowArray)));
1903 3 : psChild->children[1]->n_buffers = 2;
1904 6 : psChild->children[1]->buffers =
1905 3 : static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1906 3 : psChild->children[1]->release = OGRLayerDefaultReleaseArray;
1907 : int16_t *panOffsetsMinutes = static_cast<int16_t *>(
1908 3 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(int16_t) * nFeatureCountLimit));
1909 3 : if (panOffsetsMinutes == nullptr)
1910 0 : return false;
1911 3 : psChild->children[1]->buffers[1] = panOffsetsMinutes;
1912 :
1913 : struct tm brokenDown;
1914 3 : memset(&brokenDown, 0, sizeof(brokenDown));
1915 :
1916 15 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat)
1917 : {
1918 12 : auto &poFeature = apoFeatures[iFeat];
1919 12 : const auto psRawField = poFeature->GetRawFieldRef(i);
1920 12 : panTimestamps[iFeat] = 0;
1921 12 : panOffsetsMinutes[iFeat] = 0;
1922 12 : if (IsValidField(psRawField))
1923 : {
1924 9 : brokenDown.tm_year = psRawField->Date.Year - 1900;
1925 9 : brokenDown.tm_mon = psRawField->Date.Month - 1;
1926 9 : brokenDown.tm_mday = psRawField->Date.Day;
1927 9 : brokenDown.tm_hour = psRawField->Date.Hour;
1928 9 : brokenDown.tm_min = psRawField->Date.Minute;
1929 9 : brokenDown.tm_sec = static_cast<int>(psRawField->Date.Second);
1930 : auto nVal =
1931 9 : CPLYMDHMSToUnixTime(&brokenDown) * 1000 +
1932 9 : (static_cast<int>(psRawField->Date.Second * 1000 + 0.5f) %
1933 9 : 1000);
1934 9 : if (nFieldTZFlag >= OGR_TZFLAG_MIXED_TZ &&
1935 9 : psRawField->Date.TZFlag > OGR_TZFLAG_MIXED_TZ)
1936 : {
1937 : // Convert for psRawField->Date.TZFlag to UTC
1938 6 : const int TZOffsetMinute =
1939 6 : (psRawField->Date.TZFlag - OGR_TZFLAG_UTC) * 15;
1940 6 : const int TZOffsetMS = TZOffsetMinute * 60 * 1000;
1941 6 : nVal -= TZOffsetMS;
1942 :
1943 6 : panOffsetsMinutes[iFeat] = static_cast<int16_t>(TZOffsetMinute);
1944 : }
1945 9 : panTimestamps[iFeat] = nVal;
1946 : }
1947 3 : else if (bIsNullable)
1948 : {
1949 3 : ++psChild->null_count;
1950 3 : if (pabyValidity == nullptr)
1951 : {
1952 3 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1953 3 : psChild->buffers[0] = pabyValidity;
1954 3 : if (pabyValidity == nullptr)
1955 0 : return false;
1956 : }
1957 3 : UnsetBit(pabyValidity, iFeat);
1958 : }
1959 : }
1960 3 : return true;
1961 : }
1962 :
1963 : /************************************************************************/
1964 : /* FillDateTimeArrayAsString() */
1965 : /************************************************************************/
1966 :
1967 : static size_t
1968 9 : FillDateTimeArrayAsString(struct ArrowArray *psChild,
1969 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1970 : const size_t nFeatureCountLimit,
1971 : const bool bIsNullable, const int i,
1972 : const size_t nMemLimit)
1973 : {
1974 9 : psChild->n_buffers = 3;
1975 9 : psChild->buffers = static_cast<const void **>(CPLCalloc(3, sizeof(void *)));
1976 9 : uint8_t *pabyValidity = nullptr;
1977 : using T = uint32_t;
1978 : T *panOffsets = static_cast<T *>(
1979 9 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(T) * (1 + nFeatureCountLimit)));
1980 9 : if (panOffsets == nullptr)
1981 0 : return 0;
1982 9 : psChild->buffers[1] = panOffsets;
1983 :
1984 9 : size_t nOffset = 0;
1985 9 : size_t nFeatCount = 0;
1986 51 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat, ++nFeatCount)
1987 : {
1988 42 : panOffsets[iFeat] = static_cast<T>(nOffset);
1989 42 : const auto psRawField = apoFeatures[iFeat]->GetRawFieldRef(i);
1990 42 : if (IsValidField(psRawField))
1991 : {
1992 39 : size_t nLen = strlen("YYYY-MM-DDTHH:MM:SS");
1993 39 : if (fmodf(psRawField->Date.Second, 1.0f) != 0)
1994 27 : nLen += strlen(".sss");
1995 39 : if (psRawField->Date.TZFlag == OGR_TZFLAG_UTC)
1996 7 : nLen += 1; // 'Z'
1997 32 : else if (psRawField->Date.TZFlag > OGR_TZFLAG_MIXED_TZ)
1998 14 : nLen += strlen("+hh:mm");
1999 39 : if (nLen > nMemLimit - nOffset)
2000 : {
2001 0 : if (nFeatCount == 0)
2002 0 : return 0;
2003 0 : break;
2004 : }
2005 39 : nOffset += static_cast<T>(nLen);
2006 : }
2007 3 : else if (bIsNullable)
2008 : {
2009 3 : ++psChild->null_count;
2010 3 : if (pabyValidity == nullptr)
2011 : {
2012 3 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
2013 3 : psChild->buffers[0] = pabyValidity;
2014 3 : if (pabyValidity == nullptr)
2015 0 : return 0;
2016 : }
2017 3 : UnsetBit(pabyValidity, iFeat);
2018 : }
2019 : }
2020 9 : panOffsets[nFeatCount] = static_cast<T>(nOffset);
2021 :
2022 : char *pachValues =
2023 9 : static_cast<char *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nOffset + 1));
2024 9 : if (pachValues == nullptr)
2025 0 : return 0;
2026 9 : psChild->buffers[2] = pachValues;
2027 :
2028 9 : nOffset = 0;
2029 : char szBuffer[OGR_SIZEOF_ISO8601_DATETIME_BUFFER];
2030 : OGRISO8601Format sFormat;
2031 9 : sFormat.ePrecision = OGRISO8601Precision::AUTO;
2032 51 : for (size_t iFeat = 0; iFeat < nFeatCount; ++iFeat)
2033 : {
2034 42 : const int nLen =
2035 42 : static_cast<int>(panOffsets[iFeat + 1] - panOffsets[iFeat]);
2036 42 : if (nLen)
2037 : {
2038 39 : const auto psRawField = apoFeatures[iFeat]->GetRawFieldRef(i);
2039 39 : int nBufSize = OGRGetISO8601DateTime(psRawField, sFormat, szBuffer);
2040 39 : if (nBufSize)
2041 : {
2042 39 : memcpy(pachValues + nOffset, szBuffer,
2043 39 : std::min(nLen, nBufSize));
2044 : }
2045 39 : if (nBufSize < nLen)
2046 : {
2047 5 : memset(pachValues + nOffset + nBufSize, 0, nLen - nBufSize);
2048 : }
2049 39 : nOffset += nLen;
2050 : }
2051 : }
2052 :
2053 9 : return nFeatCount;
2054 : }
2055 :
2056 : /************************************************************************/
2057 : /* GetNextArrowArray() */
2058 : /************************************************************************/
2059 :
2060 : /** Default implementation of the ArrowArrayStream::get_next() callback.
2061 : *
2062 : * To be used by driver implementations that have a custom GetArrowStream()
2063 : * implementation.
2064 : *
2065 : * @since GDAL 3.6
2066 : */
2067 3622 : int OGRLayer::GetNextArrowArray(struct ArrowArrayStream *stream,
2068 : struct ArrowArray *out_array)
2069 : {
2070 3622 : ArrowArrayStreamPrivateDataSharedDataWrapper *poPrivate =
2071 : static_cast<ArrowArrayStreamPrivateDataSharedDataWrapper *>(
2072 : stream->private_data);
2073 :
2074 3622 : const bool bIncludeFID = CPLTestBool(
2075 : m_aosArrowArrayStreamOptions.FetchNameValueDef("INCLUDE_FID", "YES"));
2076 3622 : const bool bDateTimeAsString = m_aosArrowArrayStreamOptions.FetchBool(
2077 : GAS_OPT_DATETIME_AS_STRING, false);
2078 3622 : int nMaxBatchSize = atoi(m_aosArrowArrayStreamOptions.FetchNameValueDef(
2079 : "MAX_FEATURES_IN_BATCH", "65536"));
2080 3622 : if (nMaxBatchSize <= 0)
2081 0 : nMaxBatchSize = 1;
2082 3622 : if (nMaxBatchSize > INT_MAX - 1)
2083 0 : nMaxBatchSize = INT_MAX - 1;
2084 :
2085 : auto &oFeatureQueue =
2086 3622 : m_poSharedArrowArrayStreamPrivateData->m_oFeatureQueue;
2087 :
2088 3622 : memset(out_array, 0, sizeof(*out_array));
2089 :
2090 3622 : auto poLayerDefn = GetLayerDefn();
2091 3622 : const int nFieldCount = poLayerDefn->GetFieldCount();
2092 3622 : const int nGeomFieldCount = poLayerDefn->GetGeomFieldCount();
2093 3622 : const int nMaxChildren =
2094 3622 : (bIncludeFID ? 1 : 0) + nFieldCount + nGeomFieldCount;
2095 3622 : int iSchemaChild = 0;
2096 :
2097 3622 : if (!m_poSharedArrowArrayStreamPrivateData->m_anQueriedFIDs.empty())
2098 : {
2099 6 : if (poPrivate->poShared->m_bEOF)
2100 : {
2101 2 : return 0;
2102 : }
2103 4 : if (m_poSharedArrowArrayStreamPrivateData->m_iQueriedFIDS == 0)
2104 : {
2105 4 : CPLDebug("OGR", "Using fast FID filtering");
2106 : }
2107 8 : while (
2108 24 : oFeatureQueue.size() < static_cast<size_t>(nMaxBatchSize) &&
2109 12 : m_poSharedArrowArrayStreamPrivateData->m_iQueriedFIDS <
2110 12 : m_poSharedArrowArrayStreamPrivateData->m_anQueriedFIDs.size())
2111 : {
2112 : const auto nFID =
2113 8 : m_poSharedArrowArrayStreamPrivateData->m_anQueriedFIDs
2114 8 : [m_poSharedArrowArrayStreamPrivateData->m_iQueriedFIDS];
2115 16 : auto poFeature = std::unique_ptr<OGRFeature>(GetFeature(nFID));
2116 8 : ++m_poSharedArrowArrayStreamPrivateData->m_iQueriedFIDS;
2117 8 : if (poFeature && (m_poFilterGeom == nullptr ||
2118 0 : FilterGeometry(poFeature->GetGeomFieldRef(
2119 8 : m_iGeomFieldFilter))))
2120 : {
2121 4 : oFeatureQueue.emplace_back(std::move(poFeature));
2122 : }
2123 : }
2124 8 : if (m_poSharedArrowArrayStreamPrivateData->m_iQueriedFIDS ==
2125 4 : m_poSharedArrowArrayStreamPrivateData->m_anQueriedFIDs.size())
2126 : {
2127 4 : poPrivate->poShared->m_bEOF = true;
2128 : }
2129 : }
2130 3616 : else if (!poPrivate->poShared->m_bEOF)
2131 : {
2132 18932 : while (oFeatureQueue.size() < static_cast<size_t>(nMaxBatchSize))
2133 : {
2134 18929 : auto poFeature = std::unique_ptr<OGRFeature>(GetNextFeature());
2135 18929 : if (!poFeature)
2136 : {
2137 1862 : poPrivate->poShared->m_bEOF = true;
2138 1862 : break;
2139 : }
2140 17067 : oFeatureQueue.emplace_back(std::move(poFeature));
2141 : }
2142 : }
2143 3620 : if (oFeatureQueue.empty())
2144 : {
2145 2143 : return 0;
2146 : }
2147 :
2148 1477 : out_array->release = OGRLayerDefaultReleaseArray;
2149 1477 : out_array->null_count = 0;
2150 :
2151 1477 : out_array->n_children = nMaxChildren;
2152 1477 : out_array->children = static_cast<struct ArrowArray **>(
2153 1477 : CPLCalloc(nMaxChildren, sizeof(struct ArrowArray *)));
2154 1477 : out_array->release = OGRLayerDefaultReleaseArray;
2155 1477 : out_array->n_buffers = 1;
2156 1477 : out_array->buffers =
2157 1477 : static_cast<const void **>(CPLCalloc(1, sizeof(void *)));
2158 :
2159 1477 : size_t nFeatureCount = oFeatureQueue.size();
2160 1477 : const uint32_t nMemLimit = OGRArrowArrayHelper::GetMemLimit();
2161 2954 : std::set<int> anArrayIndicesOfStructDateTime;
2162 1477 : if (bIncludeFID)
2163 : {
2164 2686 : out_array->children[iSchemaChild] = static_cast<struct ArrowArray *>(
2165 1343 : CPLCalloc(1, sizeof(struct ArrowArray)));
2166 1343 : auto psChild = out_array->children[iSchemaChild];
2167 1343 : ++iSchemaChild;
2168 1343 : psChild->release = OGRLayerDefaultReleaseArray;
2169 1343 : psChild->n_buffers = 2;
2170 1343 : psChild->buffers =
2171 1343 : static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
2172 : int64_t *panValues =
2173 1343 : static_cast<int64_t *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
2174 : sizeof(int64_t) * (oFeatureQueue.size() + 1)));
2175 1343 : if (panValues == nullptr)
2176 0 : goto error;
2177 1343 : psChild->buffers[1] = panValues;
2178 17962 : for (size_t iFeat = 0; iFeat < oFeatureQueue.size(); ++iFeat)
2179 : {
2180 16619 : panValues[iFeat] = oFeatureQueue[iFeat]->GetFID();
2181 : }
2182 : }
2183 :
2184 13698 : for (int i = 0; i < nFieldCount; ++i)
2185 : {
2186 12246 : const auto poFieldDefn = poLayerDefn->GetFieldDefn(i);
2187 12246 : if (poFieldDefn->IsIgnored())
2188 : {
2189 13 : continue;
2190 : }
2191 :
2192 24466 : out_array->children[iSchemaChild] = static_cast<struct ArrowArray *>(
2193 12233 : CPLCalloc(1, sizeof(struct ArrowArray)));
2194 12233 : auto psChild = out_array->children[iSchemaChild];
2195 12233 : psChild->release = OGRLayerDefaultReleaseArray;
2196 12233 : const bool bIsNullable = CPL_TO_BOOL(poFieldDefn->IsNullable());
2197 12233 : const auto eSubType = poFieldDefn->GetSubType();
2198 12233 : switch (poFieldDefn->GetType())
2199 : {
2200 3528 : case OFTInteger:
2201 : {
2202 3528 : if (eSubType == OFSTBoolean)
2203 : {
2204 138 : if (!FillBoolArray(psChild, oFeatureQueue, nFeatureCount,
2205 : bIsNullable, &OGRField::Integer, i))
2206 0 : goto error;
2207 : }
2208 3390 : else if (eSubType == OFSTInt16)
2209 : {
2210 478 : if (!FillArray<int16_t>(psChild, oFeatureQueue,
2211 : nFeatureCount, bIsNullable,
2212 : &OGRField::Integer, i))
2213 0 : goto error;
2214 : }
2215 : else
2216 : {
2217 2912 : if (!FillArray<int32_t>(psChild, oFeatureQueue,
2218 : nFeatureCount, bIsNullable,
2219 : &OGRField::Integer, i))
2220 0 : goto error;
2221 : }
2222 :
2223 3528 : const auto &osDomainName = poFieldDefn->GetDomainName();
2224 3528 : if (!osDomainName.empty())
2225 : {
2226 13 : auto poDS = GetDataset();
2227 13 : if (poDS)
2228 : {
2229 : const auto poFieldDomain =
2230 13 : poDS->GetFieldDomain(osDomainName);
2231 26 : if (poFieldDomain &&
2232 13 : poFieldDomain->GetDomainType() == OFDT_CODED)
2233 : {
2234 13 : const OGRCodedFieldDomain *poCodedDomain =
2235 : static_cast<const OGRCodedFieldDomain *>(
2236 : poFieldDomain);
2237 13 : OGRArrowArrayHelper::FillDict(psChild,
2238 : poCodedDomain);
2239 : }
2240 : }
2241 : }
2242 :
2243 3528 : break;
2244 : }
2245 :
2246 333 : case OFTInteger64:
2247 : {
2248 333 : if (!FillArray<int64_t>(psChild, oFeatureQueue, nFeatureCount,
2249 : bIsNullable, &OGRField::Integer64, i))
2250 0 : goto error;
2251 333 : break;
2252 : }
2253 :
2254 2098 : case OFTReal:
2255 : {
2256 2098 : if (eSubType == OFSTFloat32)
2257 : {
2258 478 : if (!FillArray<float>(psChild, oFeatureQueue, nFeatureCount,
2259 : bIsNullable, &OGRField::Real, i))
2260 0 : goto error;
2261 : }
2262 : else
2263 : {
2264 1620 : if (!FillArray<double>(psChild, oFeatureQueue,
2265 : nFeatureCount, bIsNullable,
2266 : &OGRField::Real, i))
2267 0 : goto error;
2268 : }
2269 2098 : break;
2270 : }
2271 :
2272 3772 : case OFTString:
2273 : case OFTWideString:
2274 : {
2275 3772 : const size_t nThisFeatureCount = FillStringArray<int32_t>(
2276 : psChild, oFeatureQueue, nFeatureCount, bIsNullable, i,
2277 : nMemLimit);
2278 3772 : if (nThisFeatureCount == 0)
2279 : {
2280 19 : goto error_max_mem;
2281 : }
2282 3753 : if (nThisFeatureCount < nFeatureCount)
2283 1 : nFeatureCount = nThisFeatureCount;
2284 3753 : break;
2285 : }
2286 :
2287 913 : case OFTBinary:
2288 : {
2289 913 : const int nWidth = poFieldDefn->GetWidth();
2290 913 : if (nWidth > 0)
2291 : {
2292 8 : if (nFeatureCount > nMemLimit / nWidth)
2293 : {
2294 1 : nFeatureCount = nMemLimit / nWidth;
2295 1 : if (nFeatureCount == 0)
2296 0 : goto error_max_mem;
2297 : }
2298 8 : if (!FillFixedWidthBinaryArray(psChild, oFeatureQueue,
2299 : nFeatureCount, bIsNullable,
2300 : nWidth, i))
2301 0 : goto error;
2302 : }
2303 : else
2304 : {
2305 905 : const size_t nThisFeatureCount = FillBinaryArray<int32_t>(
2306 : psChild, oFeatureQueue, nFeatureCount, bIsNullable, i,
2307 : nMemLimit);
2308 905 : if (nThisFeatureCount == 0)
2309 : {
2310 1 : goto error_max_mem;
2311 : }
2312 904 : if (nThisFeatureCount < nFeatureCount)
2313 1 : nFeatureCount = nThisFeatureCount;
2314 : }
2315 912 : break;
2316 : }
2317 :
2318 234 : case OFTIntegerList:
2319 : {
2320 : size_t nThisFeatureCount;
2321 234 : if (eSubType == OFSTBoolean)
2322 : {
2323 : nThisFeatureCount =
2324 49 : FillListArrayBool<int32_t, GetFromIntegerList>(
2325 : psChild, oFeatureQueue, nFeatureCount, bIsNullable,
2326 : i, nMemLimit);
2327 : }
2328 185 : else if (eSubType == OFSTInt16)
2329 : {
2330 : nThisFeatureCount =
2331 28 : FillListArray<int32_t, int16_t, GetFromIntegerList>(
2332 : psChild, oFeatureQueue, nFeatureCount, bIsNullable,
2333 : i, nMemLimit);
2334 : }
2335 : else
2336 : {
2337 : nThisFeatureCount =
2338 157 : FillListArray<int32_t, int32_t, GetFromIntegerList>(
2339 : psChild, oFeatureQueue, nFeatureCount, bIsNullable,
2340 : i, nMemLimit);
2341 : }
2342 234 : if (nThisFeatureCount == 0)
2343 : {
2344 2 : goto error_max_mem;
2345 : }
2346 232 : if (nThisFeatureCount < nFeatureCount)
2347 2 : nFeatureCount = nThisFeatureCount;
2348 232 : break;
2349 : }
2350 :
2351 75 : case OFTInteger64List:
2352 : {
2353 : const size_t nThisFeatureCount =
2354 75 : FillListArray<int32_t, int64_t, GetFromInteger64List>(
2355 : psChild, oFeatureQueue, nFeatureCount, bIsNullable, i,
2356 : nMemLimit);
2357 75 : if (nThisFeatureCount == 0)
2358 : {
2359 1 : goto error_max_mem;
2360 : }
2361 74 : if (nThisFeatureCount < nFeatureCount)
2362 1 : nFeatureCount = nThisFeatureCount;
2363 74 : break;
2364 : }
2365 :
2366 156 : case OFTRealList:
2367 : {
2368 : size_t nThisFeatureCount;
2369 156 : if (eSubType == OFSTFloat32)
2370 : {
2371 : nThisFeatureCount =
2372 41 : FillListArray<int32_t, float, GetFromRealList>(
2373 : psChild, oFeatureQueue, nFeatureCount, bIsNullable,
2374 : i, nMemLimit);
2375 : }
2376 : else
2377 : {
2378 : nThisFeatureCount =
2379 115 : FillListArray<int32_t, double, GetFromRealList>(
2380 : psChild, oFeatureQueue, nFeatureCount, bIsNullable,
2381 : i, nMemLimit);
2382 : }
2383 156 : if (nThisFeatureCount == 0)
2384 : {
2385 1 : goto error_max_mem;
2386 : }
2387 155 : if (nThisFeatureCount < nFeatureCount)
2388 1 : nFeatureCount = nThisFeatureCount;
2389 155 : break;
2390 : }
2391 :
2392 203 : case OFTStringList:
2393 : case OFTWideStringList:
2394 : {
2395 203 : const size_t nThisFeatureCount = FillStringListArray<int32_t>(
2396 : psChild, oFeatureQueue, nFeatureCount, bIsNullable, i,
2397 : nMemLimit);
2398 203 : if (nThisFeatureCount == 0)
2399 : {
2400 1 : goto error_max_mem;
2401 : }
2402 202 : if (nThisFeatureCount < nFeatureCount)
2403 1 : nFeatureCount = nThisFeatureCount;
2404 202 : break;
2405 : }
2406 :
2407 125 : case OFTDate:
2408 : {
2409 125 : if (!FillDateArray(psChild, oFeatureQueue, nFeatureCount,
2410 : bIsNullable, i))
2411 0 : goto error;
2412 125 : break;
2413 : }
2414 :
2415 72 : case OFTTime:
2416 : {
2417 72 : if (!FillTimeArray(psChild, oFeatureQueue, nFeatureCount,
2418 : bIsNullable, i))
2419 0 : goto error;
2420 72 : break;
2421 : }
2422 :
2423 724 : case OFTDateTime:
2424 : {
2425 724 : if (bDateTimeAsString)
2426 : {
2427 9 : const size_t nThisFeatureCount = FillDateTimeArrayAsString(
2428 : psChild, oFeatureQueue, nFeatureCount, bIsNullable, i,
2429 : nMemLimit);
2430 9 : if (nThisFeatureCount == 0)
2431 : {
2432 0 : goto error_max_mem;
2433 : }
2434 9 : if (nThisFeatureCount < nFeatureCount)
2435 0 : nFeatureCount = nThisFeatureCount;
2436 : }
2437 : else
2438 : {
2439 : const char *pszTZOverride =
2440 715 : m_aosArrowArrayStreamOptions.FetchNameValue("TIMEZONE");
2441 715 : const int nTZFlag = poFieldDefn->GetTZFlag();
2442 715 : if ((pszTZOverride && EQUAL(pszTZOverride, "mixed")) ||
2443 706 : (!pszTZOverride && nTZFlag == OGR_TZFLAG_MIXED_TZ))
2444 :
2445 : {
2446 3 : anArrayIndicesOfStructDateTime.insert(iSchemaChild);
2447 3 : if (!FillDateTimeArrayWithTimeZone(
2448 : psChild, oFeatureQueue, nFeatureCount,
2449 : bIsNullable, i, nTZFlag))
2450 : {
2451 0 : goto error;
2452 : }
2453 : }
2454 712 : else if (!FillDateTimeArray(psChild, oFeatureQueue,
2455 : nFeatureCount, bIsNullable, i,
2456 : nTZFlag))
2457 : {
2458 0 : goto error;
2459 : }
2460 : }
2461 724 : break;
2462 : }
2463 : }
2464 :
2465 12208 : ++iSchemaChild;
2466 : }
2467 2712 : for (int i = 0; i < nGeomFieldCount; ++i)
2468 : {
2469 1260 : const auto poFieldDefn = poLayerDefn->GetGeomFieldDefn(i);
2470 1260 : if (poFieldDefn->IsIgnored())
2471 : {
2472 3 : continue;
2473 : }
2474 :
2475 2514 : out_array->children[iSchemaChild] = static_cast<struct ArrowArray *>(
2476 1257 : CPLCalloc(1, sizeof(struct ArrowArray)));
2477 1257 : auto psChild = out_array->children[iSchemaChild];
2478 1257 : ++iSchemaChild;
2479 1257 : psChild->release = OGRLayerDefaultReleaseArray;
2480 1257 : psChild->length = oFeatureQueue.size();
2481 1257 : const size_t nThisFeatureCount = FillWKBGeometryArray<int32_t>(
2482 : psChild, oFeatureQueue, nFeatureCount, poFieldDefn, i, nMemLimit);
2483 1257 : if (nThisFeatureCount == 0)
2484 : {
2485 0 : goto error_max_mem;
2486 : }
2487 1257 : if (nThisFeatureCount < nFeatureCount)
2488 1 : nFeatureCount = nThisFeatureCount;
2489 : }
2490 :
2491 : // Remove consumed features from the queue
2492 1452 : if (nFeatureCount == oFeatureQueue.size())
2493 1443 : oFeatureQueue.clear();
2494 : else
2495 : {
2496 27 : for (size_t i = 0; i < nFeatureCount; ++i)
2497 : {
2498 18 : oFeatureQueue.pop_front();
2499 : }
2500 : }
2501 :
2502 1452 : out_array->n_children = iSchemaChild;
2503 1452 : out_array->length = nFeatureCount;
2504 16156 : for (int i = 0; i < out_array->n_children; ++i)
2505 : {
2506 14704 : out_array->children[i]->length = nFeatureCount;
2507 14704 : if (cpl::contains(anArrayIndicesOfStructDateTime, i))
2508 : {
2509 9 : for (int j = 0; j < out_array->children[i]->n_children; ++j)
2510 : {
2511 6 : out_array->children[i]->children[j]->length = nFeatureCount;
2512 : }
2513 : }
2514 : }
2515 :
2516 1452 : return 0;
2517 :
2518 25 : error_max_mem:
2519 25 : CPLError(CE_Failure, CPLE_AppDefined,
2520 : "Too large feature: not even a single feature can be returned");
2521 25 : error:
2522 25 : oFeatureQueue.clear();
2523 25 : poPrivate->poShared->m_bEOF = true;
2524 25 : out_array->release(out_array);
2525 25 : memset(out_array, 0, sizeof(*out_array));
2526 25 : return ENOMEM;
2527 : }
2528 :
2529 : /************************************************************************/
2530 : /* StaticGetNextArrowArray() */
2531 : /************************************************************************/
2532 :
2533 : /** Default implementation of the ArrowArrayStream::get_next() callback.
2534 : *
2535 : * To be used by driver implementations that have a custom GetArrowStream()
2536 : * implementation.
2537 : *
2538 : * @since GDAL 3.6
2539 : */
2540 4616 : int OGRLayer::StaticGetNextArrowArray(struct ArrowArrayStream *stream,
2541 : struct ArrowArray *out_array)
2542 : {
2543 : auto poLayer = static_cast<ArrowArrayStreamPrivateDataSharedDataWrapper *>(
2544 4616 : stream->private_data)
2545 4616 : ->poShared->m_poLayer;
2546 4616 : if (poLayer == nullptr)
2547 : {
2548 1 : CPLError(CE_Failure, CPLE_NotSupported,
2549 : "Calling get_next() on a freed OGRLayer is not supported");
2550 1 : return EINVAL;
2551 : }
2552 4615 : return poLayer->GetNextArrowArray(stream, out_array);
2553 : }
2554 :
2555 : /************************************************************************/
2556 : /* ReleaseStream() */
2557 : /************************************************************************/
2558 :
2559 : /** Release a ArrowArrayStream.
2560 : *
2561 : * To be used by driver implementations that have a custom GetArrowStream()
2562 : * implementation.
2563 : *
2564 : * @param stream Arrow array stream to release.
2565 : * @since GDAL 3.6
2566 : */
2567 2272 : void OGRLayer::ReleaseStream(struct ArrowArrayStream *stream)
2568 : {
2569 2272 : assert(stream->release == OGRLayer::ReleaseStream);
2570 2272 : ArrowArrayStreamPrivateDataSharedDataWrapper *poPrivate =
2571 : static_cast<ArrowArrayStreamPrivateDataSharedDataWrapper *>(
2572 : stream->private_data);
2573 2272 : poPrivate->poShared->m_bArrowArrayStreamInProgress = false;
2574 2272 : poPrivate->poShared->m_bEOF = false;
2575 2272 : if (poPrivate->poShared->m_poLayer)
2576 2222 : poPrivate->poShared->m_poLayer->ResetReading();
2577 2272 : delete poPrivate;
2578 2272 : stream->private_data = nullptr;
2579 2272 : stream->release = nullptr;
2580 2272 : }
2581 :
2582 : /************************************************************************/
2583 : /* GetLastErrorArrowArrayStream() */
2584 : /************************************************************************/
2585 :
2586 : /** Default implementation of the ArrowArrayStream::get_last_error() callback.
2587 : *
2588 : * To be used by driver implementations that have a custom GetArrowStream()
2589 : * implementation.
2590 : *
2591 : * @since GDAL 3.6
2592 : */
2593 3 : const char *OGRLayer::GetLastErrorArrowArrayStream(struct ArrowArrayStream *)
2594 : {
2595 3 : const char *pszLastErrorMsg = CPLGetLastErrorMsg();
2596 3 : return pszLastErrorMsg[0] != '\0' ? pszLastErrorMsg : nullptr;
2597 : }
2598 :
2599 : /************************************************************************/
2600 : /* GetArrowStream() */
2601 : /************************************************************************/
2602 :
2603 : /** Get a Arrow C stream.
2604 : *
2605 : * On successful return, and when the stream interfaces is no longer needed, it
2606 : * must must be freed with out_stream->release(out_stream). Please carefully
2607 : * read https://arrow.apache.org/docs/format/CStreamInterface.html for more
2608 : * details on using Arrow C stream.
2609 : *
2610 : * The method may take into account ignored fields set with SetIgnoredFields()
2611 : * (the default implementation does), and should take into account filters set
2612 : * with SetSpatialFilter() and SetAttributeFilter(). Note however that
2613 : * specialized implementations may fallback to the default (slower)
2614 : * implementation when filters are set.
2615 : * Drivers that have a specialized implementation should advertise the
2616 : * OLCFastGetArrowStream capability.
2617 : *
2618 : * There are extra precautions to take into account in a OGR context. Unless
2619 : * otherwise specified by a particular driver implementation, the get_schema(),
2620 : * get_next() and get_last_error() function pointers of the ArrowArrayStream
2621 : * structure should no longer be used after the OGRLayer, from which the
2622 : * ArrowArrayStream structure was initialized, has been destroyed (typically at
2623 : * dataset closing). The reason is that those function pointers will typically
2624 : * point to methods of the OGRLayer instance.
2625 : * However, the ArrowSchema and ArrowArray structures filled from those
2626 : * callbacks can be used and must be released independently from the
2627 : * ArrowArrayStream or the layer.
2628 : *
2629 : * Furthermore, unless otherwise specified by a particular driver
2630 : * implementation, only one ArrowArrayStream can be active at a time on
2631 : * a given layer (that is the last active one must be explicitly released before
2632 : * a next one is asked). Changing filter state, ignored columns, modifying the
2633 : * schema or using ResetReading()/GetNextFeature() while using a
2634 : * ArrowArrayStream is strongly discouraged and may lead to unexpected results.
2635 : * As a rule of thumb, no OGRLayer methods that affect the state of a layer
2636 : * should be called on a layer, while an ArrowArrayStream on it is active.
2637 : *
2638 : * Starting with GDAL 3.8, the ArrowSchema::metadata field filled by the
2639 : * get_schema() callback may be set with the potential following items:
2640 : * <ul>
2641 : * <li>"GDAL:OGR:type": value of OGRFieldDefn::GetType(): (added in 3.11)
2642 : * Only used for DateTime fields when the DATETIME_AS_STRING=YES option is
2643 : * specified.</li>
2644 : * <li>"GDAL:OGR:alternative_name": value of
2645 : * OGRFieldDefn::GetAlternativeNameRef()</li>
2646 : * <li>"GDAL:OGR:comment": value of OGRFieldDefn::GetComment()</li>
2647 : * <li>"GDAL:OGR:default": value of OGRFieldDefn::GetDefault()</li>
2648 : * <li>"GDAL:OGR:subtype": value of OGRFieldDefn::GetSubType()</li>
2649 : * <li>"GDAL:OGR:width": value of OGRFieldDefn::GetWidth() (serialized as a
2650 : * string)</li>
2651 : * <li>"GDAL:OGR:unique": value of OGRFieldDefn::IsUnique() (serialized as
2652 : * "true" or "false")</li>
2653 : * <li>"GDAL:OGR:domain_name": value of OGRFieldDefn::GetDomainName()</li>
2654 : * </ul>
2655 : *
2656 : * A potential usage can be:
2657 : \code{.cpp}
2658 : struct ArrowArrayStream stream;
2659 : if( !poLayer->GetArrowStream(&stream, nullptr))
2660 : {
2661 : CPLError(CE_Failure, CPLE_AppDefined, "GetArrowStream() failed\n");
2662 : exit(1);
2663 : }
2664 : struct ArrowSchema schema;
2665 : if( stream.get_schema(&stream, &schema) == 0 )
2666 : {
2667 : // Do something useful
2668 : schema.release(schema);
2669 : }
2670 : while( true )
2671 : {
2672 : struct ArrowArray array;
2673 : // Look for an error (get_next() returning a non-zero code), or
2674 : // end of iteration (array.release == nullptr)
2675 : if( stream.get_next(&stream, &array) != 0 ||
2676 : array.release == nullptr )
2677 : {
2678 : break;
2679 : }
2680 : // Do something useful
2681 : array.release(&array);
2682 : }
2683 : stream.release(&stream);
2684 : \endcode
2685 : *
2686 : * A full example is available in the
2687 : * <a
2688 : href="https://gdal.org/tutorials/vector_api_tut.html#reading-from-ogr-using-the-arrow-c-stream-data-interface">Reading
2689 : From OGR using the Arrow C Stream data interface</a> tutorial.
2690 : *
2691 : * Options may be driver specific. The default implementation recognizes the
2692 : * following options:
2693 : * <ul>
2694 : * <li>INCLUDE_FID=YES/NO. Whether to include the FID column. Defaults to YES.
2695 : * </li>
2696 : * <li>MAX_FEATURES_IN_BATCH=integer. Maximum number of features to retrieve in
2697 : * a ArrowArray batch. Defaults to 65 536.</li>
2698 : * <li>TIMEZONE="unknown", "mixed", "UTC", "(+|:)HH:MM" or any other value supported by
2699 : * Arrow. (GDAL >= 3.8)
2700 : * Override the timezone flag nominally provided by
2701 : * OGRFieldDefn::GetTZFlag(), and used for the Arrow field timezone
2702 : * declaration, with a user specified timezone.
2703 : * Note that datetime values in Arrow arrays are always stored in UTC, and
2704 : * that the time zone flag used by GDAL to convert to UTC is the one of the
2705 : * OGRField::Date::TZFlag member at the OGRFeature level. The conversion
2706 : * to UTC of a OGRField::Date is only done if both the timezone indicated by
2707 : * OGRField::Date::TZFlag and the one at the OGRFieldDefn level (or set by
2708 : * this TIMEZONE option) are not unknown.
2709 : * Since GDAL 3.13, "mixed" can be used to create an Arrow structure field,
2710 : * following the "timestamp with offset" extension (https://github.com/apache/arrow/blob/main/docs/source/format/CanonicalExtensions.rst#timestamp-with-offset)
2711 : * and storing both a UTC timestamp and the offset in minutes from the UTC
2712 : * timezone.
2713 : * </li>
2714 : * <li>DATETIME_AS_STRING=YES/NO. Defaults to NO. Added in GDAL 3.11.
2715 : * Whether DateTime fields should be returned as a (normally ISO-8601
2716 : * formatted) string by drivers. The aim is to be able to handle mixed
2717 : * timezones (or timezone naive values) in the same column.
2718 : * All drivers must honour that option, and potentially fallback to the
2719 : * OGRLayer generic implementation if they cannot (which is the case for the
2720 : * Arrow, Parquet and ADBC drivers).
2721 : * When DATETIME_AS_STRING=YES, the TIMEZONE option is ignored.
2722 : * </li>
2723 : * <li>GEOMETRY_METADATA_ENCODING=OGC/GEOARROW (GDAL >= 3.8).
2724 : * The default is OGC, which will lead to setting
2725 : * the Arrow geometry column metadata to ARROW:extension:name=ogc.wkb.
2726 : * If setting to GEOMETRY_METADATA_ENCODING to GEOARROW,
2727 : * ARROW:extension:name=geoarrow.wkb and
2728 : * ARROW:extension:metadata={"crs": <projjson CRS representation>> are set.
2729 : * </li>
2730 : * </ul>
2731 : *
2732 : * The Arrow/Parquet drivers recognize the following option:
2733 : * <ul>
2734 : * <li>GEOMETRY_ENCODING=WKB. To force a fallback to the generic implementation
2735 : * when the native geometry encoding is not WKB. Otherwise the geometry
2736 : * will be returned with its native Arrow encoding
2737 : * (possibly using GeoArrow encoding).</li>
2738 : * </ul>
2739 : *
2740 : * @param out_stream Output stream. Must *not* be NULL. The content of the
2741 : * structure does not need to be initialized.
2742 : * @param papszOptions NULL terminated list of key=value options.
2743 : * @return true in case of success.
2744 : * @since GDAL 3.6
2745 : */
2746 2276 : bool OGRLayer::GetArrowStream(struct ArrowArrayStream *out_stream,
2747 : CSLConstList papszOptions)
2748 : {
2749 2276 : memset(out_stream, 0, sizeof(*out_stream));
2750 3847 : if (m_poSharedArrowArrayStreamPrivateData &&
2751 3847 : m_poSharedArrowArrayStreamPrivateData->m_bArrowArrayStreamInProgress)
2752 : {
2753 4 : CPLError(CE_Failure, CPLE_AppDefined,
2754 : "An arrow Arrow Stream is in progress on that layer. Only "
2755 : "one at a time is allowed in this implementation.");
2756 4 : return false;
2757 : }
2758 2272 : m_aosArrowArrayStreamOptions.Assign(CSLDuplicate(papszOptions), true);
2759 :
2760 2272 : out_stream->get_schema = OGRLayer::StaticGetArrowSchema;
2761 2272 : out_stream->get_next = OGRLayer::StaticGetNextArrowArray;
2762 2272 : out_stream->get_last_error = OGRLayer::GetLastErrorArrowArrayStream;
2763 2272 : out_stream->release = OGRLayer::ReleaseStream;
2764 :
2765 2272 : if (m_poSharedArrowArrayStreamPrivateData == nullptr)
2766 : {
2767 : m_poSharedArrowArrayStreamPrivateData =
2768 705 : std::make_shared<ArrowArrayStreamPrivateData>();
2769 705 : m_poSharedArrowArrayStreamPrivateData->m_poLayer = this;
2770 : }
2771 2272 : m_poSharedArrowArrayStreamPrivateData->m_bArrowArrayStreamInProgress = true;
2772 :
2773 : // Special case for "FID = constant", or "FID IN (constant1, ...., constantN)"
2774 2272 : m_poSharedArrowArrayStreamPrivateData->m_anQueriedFIDs.clear();
2775 2272 : m_poSharedArrowArrayStreamPrivateData->m_iQueriedFIDS = 0;
2776 2272 : if (m_poAttrQuery)
2777 : {
2778 : swq_expr_node *poNode =
2779 1076 : static_cast<swq_expr_node *>(m_poAttrQuery->GetSWQExpr());
2780 3228 : if (poNode->eNodeType == SNT_OPERATION &&
2781 1076 : (poNode->nOperation == SWQ_IN || poNode->nOperation == SWQ_EQ) &&
2782 842 : poNode->papoSubExpr[0]->eNodeType == SNT_COLUMN &&
2783 291 : poNode->papoSubExpr[0]->field_index ==
2784 2161 : GetLayerDefn()->GetFieldCount() + SPF_FID &&
2785 9 : TestCapability(OLCRandomRead))
2786 : {
2787 8 : std::set<GIntBig> oSetAlreadyListed;
2788 13 : for (int i = 1; i < poNode->nSubExprCount; ++i)
2789 : {
2790 27 : if (poNode->papoSubExpr[i]->eNodeType == SNT_CONSTANT &&
2791 18 : poNode->papoSubExpr[i]->field_type == SWQ_INTEGER64 &&
2792 9 : oSetAlreadyListed.find(poNode->papoSubExpr[i]->int_value) ==
2793 18 : oSetAlreadyListed.end())
2794 : {
2795 8 : oSetAlreadyListed.insert(poNode->papoSubExpr[i]->int_value);
2796 8 : m_poSharedArrowArrayStreamPrivateData->m_anQueriedFIDs
2797 8 : .push_back(poNode->papoSubExpr[i]->int_value);
2798 : }
2799 : }
2800 : }
2801 : }
2802 :
2803 2272 : auto poPrivateData = new ArrowArrayStreamPrivateDataSharedDataWrapper();
2804 2272 : poPrivateData->poShared = m_poSharedArrowArrayStreamPrivateData;
2805 2272 : out_stream->private_data = poPrivateData;
2806 2272 : return true;
2807 : }
2808 :
2809 : /************************************************************************/
2810 : /* OGR_L_GetArrowStream() */
2811 : /************************************************************************/
2812 :
2813 : /** Get a Arrow C stream.
2814 : *
2815 : * On successful return, and when the stream interfaces is no longer needed, it
2816 : * must be freed with out_stream->release(out_stream). Please carefully read
2817 : * https://arrow.apache.org/docs/format/CStreamInterface.html for more details
2818 : * on using Arrow C stream.
2819 : *
2820 : * The method may take into account ignored fields set with SetIgnoredFields()
2821 : * (the default implementation does), and should take into account filters set
2822 : * with SetSpatialFilter() and SetAttributeFilter(). Note however that
2823 : * specialized implementations may fallback to the default (slower)
2824 : * implementation when filters are set.
2825 : * Drivers that have a specialized implementation should
2826 : * advertise the OLCFastGetArrowStream capability.
2827 : *
2828 : * There are extra precautions to take into account in a OGR context. Unless
2829 : * otherwise specified by a particular driver implementation, the get_schema(),
2830 : * get_next() and get_last_error() function pointers of the ArrowArrayStream
2831 : * structure should no longer be used after the OGRLayer, from which the
2832 : * ArrowArrayStream structure was initialized, has been destroyed (typically at
2833 : * dataset closing). The reason is that those function pointers will typically
2834 : * point to methods of the OGRLayer instance.
2835 : * However, the ArrowSchema and ArrowArray structures filled from those
2836 : * callbacks can be used and must be released independently from the
2837 : * ArrowArrayStream or the layer.
2838 : *
2839 : * Furthermore, unless otherwise specified by a particular driver
2840 : * implementation, only one ArrowArrayStream can be active at a time on
2841 : * a given layer (that is the last active one must be explicitly released before
2842 : * a next one is asked). Changing filter state, ignored columns, modifying the
2843 : * schema or using ResetReading()/GetNextFeature() while using a
2844 : * ArrowArrayStream is strongly discouraged and may lead to unexpected results.
2845 : * As a rule of thumb, no OGRLayer methods that affect the state of a layer
2846 : * should be called on a layer, while an ArrowArrayStream on it is active.
2847 : *
2848 : * Starting with GDAL 3.8, the ArrowSchema::metadata field filled by the
2849 : * get_schema() callback may be set with the potential following items:
2850 : * <ul>
2851 : * <li>"GDAL:OGR:type": value of OGRFieldDefn::GetType(): (added in 3.11)
2852 : * Only used for DateTime fields when the DATETIME_AS_STRING=YES option is
2853 : * specified.</li>
2854 : * <li>"GDAL:OGR:alternative_name": value of
2855 : * OGRFieldDefn::GetAlternativeNameRef()</li>
2856 : * <li>"GDAL:OGR:comment": value of OGRFieldDefn::GetComment()</li>
2857 : * <li>"GDAL:OGR:default": value of OGRFieldDefn::GetDefault()</li>
2858 : * <li>"GDAL:OGR:subtype": value of OGRFieldDefn::GetSubType()</li>
2859 : * <li>"GDAL:OGR:width": value of OGRFieldDefn::GetWidth() (serialized as a
2860 : * string)</li>
2861 : * <li>"GDAL:OGR:unique": value of OGRFieldDefn::IsUnique() (serialized as
2862 : * "true" or "false")</li>
2863 : * <li>"GDAL:OGR:domain_name": value of OGRFieldDefn::GetDomainName()</li>
2864 : * </ul>
2865 : *
2866 : * A potential usage can be:
2867 : \code{.cpp}
2868 : struct ArrowArrayStream stream;
2869 : if( !OGR_L_GetArrowStream(hLayer, &stream, nullptr))
2870 : {
2871 : CPLError(CE_Failure, CPLE_AppDefined,
2872 : "OGR_L_GetArrowStream() failed\n");
2873 : exit(1);
2874 : }
2875 : struct ArrowSchema schema;
2876 : if( stream.get_schema(&stream, &schema) == 0 )
2877 : {
2878 : // Do something useful
2879 : schema.release(schema);
2880 : }
2881 : while( true )
2882 : {
2883 : struct ArrowArray array;
2884 : // Look for an error (get_next() returning a non-zero code), or
2885 : // end of iteration (array.release == nullptr)
2886 : if( stream.get_next(&stream, &array) != 0 ||
2887 : array.release == nullptr )
2888 : {
2889 : break;
2890 : }
2891 : // Do something useful
2892 : array.release(&array);
2893 : }
2894 : stream.release(&stream);
2895 : \endcode
2896 : *
2897 : * A full example is available in the
2898 : * <a
2899 : href="https://gdal.org/tutorials/vector_api_tut.html#reading-from-ogr-using-the-arrow-c-stream-data-interface">Reading
2900 : From OGR using the Arrow C Stream data interface</a> tutorial.
2901 : *
2902 : * Options may be driver specific. The default implementation recognizes the
2903 : * following options:
2904 : * <ul>
2905 : * <li>INCLUDE_FID=YES/NO. Whether to include the FID column. Defaults to
2906 : YES.</li>
2907 : * <li>MAX_FEATURES_IN_BATCH=integer. Maximum number of features to retrieve in
2908 : * a ArrowArray batch. Defaults to 65 536.</li>
2909 : * <li>TIMEZONE="unknown", "mixed", "UTC", "(+|:)HH:MM" or any other value supported by
2910 : * Arrow. (GDAL >= 3.8)
2911 : * Override the timezone flag nominally provided by
2912 : * OGRFieldDefn::GetTZFlag(), and used for the Arrow field timezone
2913 : * declaration, with a user specified timezone.
2914 : * Note that datetime values in Arrow arrays are always stored in UTC, and
2915 : * that the time zone flag used by GDAL to convert to UTC is the one of the
2916 : * OGRField::Date::TZFlag member at the OGRFeature level. The conversion
2917 : * to UTC of a OGRField::Date is only done if both the timezone indicated by
2918 : * OGRField::Date::TZFlag and the one at the OGRFieldDefn level (or set by
2919 : * this TIMEZONE option) are not unknown.
2920 : * Since GDAL 3.13, "mixed" can be used to create an Arrow structure field,
2921 : * following the "timestamp with offset" extension (https://github.com/apache/arrow/blob/main/docs/source/format/CanonicalExtensions.rst#timestamp-with-offset)
2922 : * and storing both a UTC timestamp and the offset in minutes from the UTC
2923 : * timezone.
2924 : * </li>
2925 : * <li>DATETIME_AS_STRING=YES/NO. Defaults to NO. Added in GDAL 3.11.
2926 : * Whether DateTime fields should be returned as a (normally ISO-8601
2927 : * formatted) string by drivers. The aim is to be able to handle mixed
2928 : * timezones (or timezone naive values) in the same column.
2929 : * All drivers must honour that option, and potentially fallback to the
2930 : * OGRLayer generic implementation if they cannot (which is the case for the
2931 : * Arrow, Parquet and ADBC drivers).
2932 : * When DATETIME_AS_STRING=YES, the TIMEZONE option is ignored.
2933 : * </li>
2934 : * <li>GEOMETRY_METADATA_ENCODING=OGC/GEOARROW (GDAL >= 3.8).
2935 : * The default is OGC, which will lead to setting
2936 : * the Arrow geometry column metadata to ARROW:extension:name=ogc.wkb.
2937 : * If setting to GEOMETRY_METADATA_ENCODING to GEOARROW,
2938 : * ARROW:extension:name=geoarrow.wkb and
2939 : * ARROW:extension:metadata={"crs": <projjson CRS representation>> are set.
2940 : * </li>
2941 : * </ul>
2942 : *
2943 : * The Arrow/Parquet drivers recognize the following option:
2944 : * <ul>
2945 : * <li>GEOMETRY_ENCODING=WKB. To force a fallback to the generic implementation
2946 : * when the native geometry encoding is not WKB. Otherwise the geometry
2947 : * will be returned with its native Arrow encoding
2948 : * (possibly using GeoArrow encoding).</li>
2949 : * </ul>
2950 : *
2951 : * @param hLayer Layer
2952 : * @param out_stream Output stream. Must *not* be NULL. The content of the
2953 : * structure does not need to be initialized.
2954 : * @param papszOptions NULL terminated list of key=value options.
2955 : * @return true in case of success.
2956 : * @since GDAL 3.6
2957 : */
2958 375 : bool OGR_L_GetArrowStream(OGRLayerH hLayer, struct ArrowArrayStream *out_stream,
2959 : CSLConstList papszOptions)
2960 : {
2961 375 : VALIDATE_POINTER1(hLayer, "OGR_L_GetArrowStream", false);
2962 375 : VALIDATE_POINTER1(out_stream, "OGR_L_GetArrowStream", false);
2963 :
2964 750 : return OGRLayer::FromHandle(hLayer)->GetArrowStream(out_stream,
2965 375 : papszOptions);
2966 : }
2967 :
2968 : /************************************************************************/
2969 : /* OGRParseArrowMetadata() */
2970 : /************************************************************************/
2971 :
2972 : std::map<std::string, std::string>
2973 208 : OGRParseArrowMetadata(const char *pabyMetadata)
2974 : {
2975 208 : std::map<std::string, std::string> oMetadata;
2976 : int32_t nKVP;
2977 208 : memcpy(&nKVP, pabyMetadata, sizeof(int32_t));
2978 208 : pabyMetadata += sizeof(int32_t);
2979 425 : for (int i = 0; i < nKVP; ++i)
2980 : {
2981 : int32_t nSizeKey;
2982 217 : memcpy(&nSizeKey, pabyMetadata, sizeof(int32_t));
2983 217 : pabyMetadata += sizeof(int32_t);
2984 434 : std::string osKey;
2985 217 : osKey.assign(pabyMetadata, nSizeKey);
2986 217 : pabyMetadata += nSizeKey;
2987 :
2988 : int32_t nSizeValue;
2989 217 : memcpy(&nSizeValue, pabyMetadata, sizeof(int32_t));
2990 217 : pabyMetadata += sizeof(int32_t);
2991 434 : std::string osValue;
2992 217 : osValue.assign(pabyMetadata, nSizeValue);
2993 217 : pabyMetadata += nSizeValue;
2994 :
2995 217 : oMetadata[osKey] = std::move(osValue);
2996 : }
2997 :
2998 416 : return oMetadata;
2999 : }
3000 :
3001 : /************************************************************************/
3002 : /* GetStringAsStringView() */
3003 : /************************************************************************/
3004 :
3005 : template <typename OffsetType>
3006 125 : static std::string_view GetStringAsStringView(const struct ArrowArray *array,
3007 : const size_t nIdx)
3008 : {
3009 125 : const OffsetType *panOffsets =
3010 125 : static_cast<const OffsetType *>(array->buffers[1]) +
3011 125 : static_cast<size_t>(array->offset) + nIdx;
3012 125 : const char *pchStr = reinterpret_cast<const char *>(array->buffers[2]);
3013 : if constexpr (std::is_same_v<OffsetType, uint64_t>)
3014 : {
3015 82 : if (panOffsets[1] - panOffsets[0] >
3016 41 : std::numeric_limits<size_t>::max() - 1)
3017 : {
3018 0 : CPLError(CE_Failure, CPLE_AppDefined, "Too large string");
3019 0 : return std::string_view();
3020 : }
3021 : }
3022 125 : return std::string_view(pchStr + static_cast<size_t>(panOffsets[0]),
3023 125 : static_cast<size_t>(panOffsets[1] - panOffsets[0]));
3024 : }
3025 :
3026 : /************************************************************************/
3027 : /* GetStringView() */
3028 : /************************************************************************/
3029 :
3030 0 : static std::string_view GetStringView(const struct ArrowArray *array,
3031 : const size_t nIdx)
3032 : {
3033 : // Cf https://arrow.apache.org/docs/format/Columnar.html#variable-size-binary-view-layout
3034 0 : const uint32_t *panStringView =
3035 0 : static_cast<const uint32_t *>(array->buffers[1]) +
3036 0 : (static_cast<size_t>(array->offset) + nIdx) * N_VALUES_PER_STRING_VIEW;
3037 0 : constexpr int IDX_LENGTH = 0;
3038 0 : constexpr int IDX_PREFIX_OR_DATA = 1;
3039 0 : constexpr int IDX_BUFFER_IDX = 2;
3040 0 : constexpr int IDX_OFFSET = 3;
3041 0 : const uint32_t nLength = panStringView[IDX_LENGTH];
3042 0 : const char *pchPrefixOrStr =
3043 : reinterpret_cast<const char *>(panStringView + IDX_PREFIX_OR_DATA);
3044 0 : if (nLength <= 12)
3045 : {
3046 0 : return std::string_view(pchPrefixOrStr, nLength);
3047 : }
3048 : else
3049 : {
3050 0 : const uint32_t nBufferIdx = panStringView[IDX_BUFFER_IDX];
3051 0 : const uint32_t nOffset = panStringView[IDX_OFFSET];
3052 0 : constexpr int BASE_BUFFER_IDX = 2;
3053 0 : CPLAssert(BASE_BUFFER_IDX + nBufferIdx < array->n_buffers);
3054 0 : std::string_view s(static_cast<const char *>(
3055 0 : array->buffers[BASE_BUFFER_IDX + nBufferIdx]) +
3056 0 : nOffset,
3057 0 : nLength);
3058 : #ifdef DEBUG
3059 : // cppcheck-suppress unreadVariable
3060 0 : constexpr int PREFIX_LENGTH = 4;
3061 0 : CPLAssert(memcmp(s.data(), pchPrefixOrStr, PREFIX_LENGTH) == 0);
3062 : #endif
3063 0 : return s;
3064 : }
3065 : }
3066 :
3067 : /************************************************************************/
3068 : /* ParseDecimalFormat() */
3069 : /************************************************************************/
3070 :
3071 686 : static bool ParseDecimalFormat(const char *format, int &nPrecision, int &nScale,
3072 : int &nWidthInBytes)
3073 : {
3074 : // d:19,10 ==> decimal128 [precision 19, scale 10]
3075 : // d:19,10,NNN ==> decimal bitwidth = NNN [precision 19, scale 10]
3076 686 : nPrecision = 0;
3077 686 : nScale = 0;
3078 686 : nWidthInBytes = 128 / 8; // 128 bit
3079 686 : const char *pszFirstComma = strchr(format + 2, ',');
3080 686 : if (pszFirstComma)
3081 : {
3082 686 : nPrecision = atoi(format + 2);
3083 686 : nScale = atoi(pszFirstComma + 1);
3084 686 : const char *pszSecondComma = strchr(pszFirstComma + 1, ',');
3085 686 : if (pszSecondComma)
3086 : {
3087 274 : const int nWidthInBits = atoi(pszSecondComma + 1);
3088 274 : if ((nWidthInBits % 8) != 0)
3089 : {
3090 : // shouldn't happen for well-format schemas
3091 0 : nWidthInBytes = 0;
3092 0 : return false;
3093 : }
3094 : else
3095 : {
3096 274 : nWidthInBytes = nWidthInBits / 8;
3097 : }
3098 : }
3099 : }
3100 : else
3101 : {
3102 : // shouldn't happen for well-format schemas
3103 0 : nWidthInBytes = 0;
3104 0 : return false;
3105 : }
3106 686 : return true;
3107 : }
3108 :
3109 : /************************************************************************/
3110 : /* GetErrorIfUnsupportedDecimal() */
3111 : /************************************************************************/
3112 :
3113 55 : static const char *GetErrorIfUnsupportedDecimal(int nWidthInBytes,
3114 : int nPrecision)
3115 : {
3116 :
3117 55 : if (nWidthInBytes != 128 / 8 && nWidthInBytes != 256 / 8)
3118 : {
3119 0 : return "For decimal field, only width 128 and 256 are supported";
3120 : }
3121 :
3122 : // precision=19 fits on 64 bits
3123 55 : if (nPrecision <= 0 || nPrecision > 19)
3124 : {
3125 0 : return "For decimal field, only precision up to 19 is supported";
3126 : }
3127 :
3128 55 : return nullptr;
3129 : }
3130 :
3131 : /************************************************************************/
3132 : /* IsArrowTimeStampWithOffsetField() */
3133 : /************************************************************************/
3134 :
3135 1260 : static bool IsArrowTimeStampWithOffsetField(const struct ArrowSchema *schema)
3136 : {
3137 : bool ret =
3138 3377 : IsStructure(schema->format) && schema->n_children == 2 &&
3139 857 : IsTimestamp(schema->children[0]->format) &&
3140 0 : IsInt16(schema->children[1]->format) &&
3141 2520 : strcmp(schema->children[0]->name, ATSWO_TIMESTAMP_FIELD_NAME) == 0 &&
3142 0 : strcmp(schema->children[1]->name, ATSWO_OFFSET_MINUTES_FIELD_NAME) == 0;
3143 1260 : if (ret)
3144 : {
3145 0 : const auto oMetadata = OGRParseArrowMetadata(schema->metadata);
3146 0 : const auto oIter = oMetadata.find(ARROW_EXTENSION_NAME_KEY);
3147 0 : ret = oIter != oMetadata.end() &&
3148 0 : oIter->second == EXTENSION_NAME_ARROW_TIMESTAMP_WITH_OFFSET;
3149 : }
3150 1260 : return ret;
3151 : }
3152 :
3153 : /************************************************************************/
3154 : /* IsHandledSchema() */
3155 : /************************************************************************/
3156 :
3157 15760 : static bool IsHandledSchema(bool bTopLevel, const struct ArrowSchema *schema,
3158 : const std::string &osPrefix, bool bHasAttrQuery,
3159 : const CPLStringList &aosUsedFields)
3160 : {
3161 15760 : const char *format = schema->format;
3162 15760 : if (IsStructure(format))
3163 : {
3164 1246 : if (IsArrowTimeStampWithOffsetField(schema) &&
3165 1246 : aosUsedFields.FindString((osPrefix + schema->name).c_str()) >= 0)
3166 : {
3167 0 : return false;
3168 : }
3169 :
3170 12285 : for (int64_t i = 0; i < schema->n_children; ++i)
3171 : {
3172 44156 : if (!IsHandledSchema(/* bTopLevel = */ false,
3173 11039 : schema->children[static_cast<size_t>(i)],
3174 24772 : bTopLevel ? std::string()
3175 13733 : : osPrefix + schema->name + ".",
3176 : bHasAttrQuery, aosUsedFields))
3177 : {
3178 0 : return false;
3179 : }
3180 : }
3181 1246 : return true;
3182 : }
3183 :
3184 : // Lists or maps
3185 25169 : if (IsList(format) || IsLargeList(format) || IsFixedSizeList(format) ||
3186 10655 : IsMap(format))
3187 : {
3188 4566 : if (!IsHandledSchema(/* bTopLevel = */ false, schema->children[0],
3189 : osPrefix, bHasAttrQuery, aosUsedFields))
3190 : {
3191 0 : return false;
3192 : }
3193 : // For now, we can't filter on lists or maps
3194 4566 : if (aosUsedFields.FindString((osPrefix + schema->name).c_str()) >= 0)
3195 : {
3196 0 : CPLDebug("OGR",
3197 : "Field %s has unhandled format '%s' for an "
3198 : "attribute to filter on",
3199 0 : (osPrefix + schema->name).c_str(), format);
3200 0 : return false;
3201 : }
3202 4566 : return true;
3203 : }
3204 :
3205 9948 : const char *const apszHandledFormats[] = {
3206 : "b", // boolean
3207 : "c", // int8
3208 : "C", // uint8
3209 : "s", // int16
3210 : "S", // uint16
3211 : "i", // int32
3212 : "I", // uint32
3213 : "l", // int64
3214 : "L", // uint64
3215 : "e", // float16
3216 : "f", // float32
3217 : "g", // float64,
3218 : "z", // binary
3219 : "Z", // large binary
3220 : "u", // UTF-8 string
3221 : "U", // large UTF-8 string
3222 : "tdD", // date32[days]
3223 : "tdm", // date64[milliseconds]
3224 : "tts", //time32 [seconds]
3225 : "ttm", //time32 [milliseconds]
3226 : "ttu", //time64 [microseconds]
3227 : "ttn", //time64 [nanoseconds]
3228 : };
3229 :
3230 115231 : for (const char *pszHandledFormat : apszHandledFormats)
3231 : {
3232 113923 : if (strcmp(format, pszHandledFormat) == 0)
3233 : {
3234 8640 : return true;
3235 : }
3236 : }
3237 :
3238 1308 : if (IsDecimal(format))
3239 : {
3240 790 : if (bHasAttrQuery &&
3241 790 : aosUsedFields.FindString((osPrefix + schema->name).c_str()) >= 0)
3242 : {
3243 2 : int nPrecision = 0;
3244 2 : int nScale = 0;
3245 2 : int nWidthInBytes = 0;
3246 2 : if (!ParseDecimalFormat(format, nPrecision, nScale, nWidthInBytes))
3247 : {
3248 0 : CPLDebug("OGR", "%s",
3249 0 : (std::string("Invalid field format ") + format +
3250 0 : " for field " + osPrefix + schema->name)
3251 : .c_str());
3252 0 : return false;
3253 : }
3254 :
3255 : const char *pszError =
3256 2 : GetErrorIfUnsupportedDecimal(nWidthInBytes, nPrecision);
3257 2 : if (pszError)
3258 : {
3259 0 : CPLDebug("OGR", "%s", pszError);
3260 0 : return false;
3261 : }
3262 : }
3263 412 : return true;
3264 : }
3265 :
3266 896 : if (IsFixedWidthBinary(format) || IsTimestamp(format))
3267 : {
3268 896 : return true;
3269 : }
3270 :
3271 0 : CPLDebug("OGR", "Field %s has unhandled format '%s'",
3272 0 : (osPrefix + schema->name).c_str(), format);
3273 0 : return false;
3274 : }
3275 :
3276 : /************************************************************************/
3277 : /* OGRLayer::CanPostFilterArrowArray() */
3278 : /************************************************************************/
3279 :
3280 : /** Whether the PostFilterArrowArray() can work on the schema to remove
3281 : * rows that aren't selected by the spatial or attribute filter.
3282 : */
3283 155 : bool OGRLayer::CanPostFilterArrowArray(const struct ArrowSchema *schema) const
3284 : {
3285 155 : if (!IsHandledSchema(
3286 155 : /* bTopLevel=*/true, schema, std::string(),
3287 155 : m_poAttrQuery != nullptr,
3288 310 : m_poAttrQuery ? CPLStringList(m_poAttrQuery->GetUsedFields())
3289 : : CPLStringList()))
3290 : {
3291 0 : return false;
3292 : }
3293 :
3294 155 : if (m_poFilterGeom)
3295 : {
3296 22 : bool bFound = false;
3297 : const char *pszGeomFieldName =
3298 : const_cast<OGRLayer *>(this)
3299 22 : ->GetLayerDefn()
3300 22 : ->GetGeomFieldDefn(m_iGeomFieldFilter)
3301 22 : ->GetNameRef();
3302 839 : for (int64_t i = 0; i < schema->n_children; ++i)
3303 : {
3304 839 : const auto fieldSchema = schema->children[i];
3305 839 : if (strcmp(fieldSchema->name, pszGeomFieldName) == 0)
3306 : {
3307 23 : if (!IsBinary(fieldSchema->format) &&
3308 1 : !IsLargeBinary(fieldSchema->format))
3309 : {
3310 1 : CPLDebug("OGR", "Geometry field %s has handled format '%s'",
3311 : fieldSchema->name, fieldSchema->format);
3312 1 : return false;
3313 : }
3314 :
3315 : // Check if ARROW:extension:name = ogc.wkb
3316 21 : const char *pabyMetadata = fieldSchema->metadata;
3317 21 : if (!pabyMetadata)
3318 : {
3319 0 : CPLDebug(
3320 : "OGR",
3321 : "Geometry field %s lacks metadata in its schema field",
3322 : fieldSchema->name);
3323 0 : return false;
3324 : }
3325 :
3326 21 : const auto oMetadata = OGRParseArrowMetadata(pabyMetadata);
3327 21 : auto oIter = oMetadata.find(ARROW_EXTENSION_NAME_KEY);
3328 21 : if (oIter == oMetadata.end())
3329 : {
3330 0 : CPLDebug("OGR",
3331 : "Geometry field %s lacks "
3332 : "%s metadata "
3333 : "in its schema field",
3334 : fieldSchema->name, ARROW_EXTENSION_NAME_KEY);
3335 0 : return false;
3336 : }
3337 21 : if (oIter->second != EXTENSION_NAME_OGC_WKB &&
3338 0 : oIter->second != EXTENSION_NAME_GEOARROW_WKB)
3339 : {
3340 0 : CPLDebug("OGR",
3341 : "Geometry field %s has unexpected "
3342 : "%s = '%s' metadata "
3343 : "in its schema field",
3344 : fieldSchema->name, ARROW_EXTENSION_NAME_KEY,
3345 0 : oIter->second.c_str());
3346 0 : return false;
3347 : }
3348 :
3349 21 : bFound = true;
3350 21 : break;
3351 : }
3352 : }
3353 21 : if (!bFound)
3354 : {
3355 0 : CPLDebug("OGR", "Cannot find geometry field %s in schema",
3356 : pszGeomFieldName);
3357 0 : return false;
3358 : }
3359 : }
3360 :
3361 154 : return true;
3362 : }
3363 :
3364 : #if 0
3365 : /************************************************************************/
3366 : /* CheckValidityBuffer() */
3367 : /************************************************************************/
3368 :
3369 : static void CheckValidityBuffer(const struct ArrowArray *array)
3370 : {
3371 : if (array->null_count < 0)
3372 : return;
3373 : const uint8_t *pabyValidity =
3374 : static_cast<const uint8_t *>(const_cast<const void *>(array->buffers[0]));
3375 : if( !pabyValidity )
3376 : {
3377 : CPLAssert(array->null_count == 0);
3378 : return;
3379 : }
3380 : size_t null_count = 0;
3381 : const size_t nOffset = static_cast<size_t>(array->offset);
3382 : for(size_t i = 0; i < static_cast<size_t>(array->length); ++i )
3383 : {
3384 : if (!TestBit(pabyValidity, i + nOffset))
3385 : ++ null_count;
3386 : }
3387 : CPLAssert(static_cast<size_t>(array->null_count) == null_count);
3388 : }
3389 : #endif
3390 :
3391 : /************************************************************************/
3392 : /* CompactValidityBuffer() */
3393 : /************************************************************************/
3394 :
3395 7682 : static void CompactValidityBuffer(
3396 : const struct ArrowSchema *, struct ArrowArray *array, size_t iStart,
3397 : const std::vector<bool> &abyValidityFromFilters, size_t nNewLength)
3398 : {
3399 : // Invalidate null_count as the same validity buffer may be used when
3400 : // scrolling batches, and this creates confusion if we try to set it
3401 : // to different values among the batches
3402 7682 : if (array->null_count <= 0)
3403 : {
3404 4186 : array->null_count = -1;
3405 4186 : return;
3406 : }
3407 3496 : array->null_count = -1;
3408 :
3409 3496 : CPLAssert(static_cast<size_t>(array->length) >=
3410 : iStart + abyValidityFromFilters.size());
3411 3496 : uint8_t *pabyValidity =
3412 3496 : static_cast<uint8_t *>(const_cast<void *>(array->buffers[0]));
3413 3496 : const size_t nLength = abyValidityFromFilters.size();
3414 3496 : const size_t nOffset = static_cast<size_t>(array->offset);
3415 3496 : size_t j = iStart + nOffset;
3416 12883 : for (size_t i = 0; i < nLength && j < nNewLength + nOffset; ++i)
3417 : {
3418 9387 : if (abyValidityFromFilters[i])
3419 : {
3420 5823 : if (TestBit(pabyValidity, i + iStart + nOffset))
3421 4387 : SetBit(pabyValidity, j);
3422 : else
3423 1436 : UnsetBit(pabyValidity, j);
3424 5823 : ++j;
3425 : }
3426 : }
3427 : }
3428 :
3429 : /************************************************************************/
3430 : /* CompactBoolArray() */
3431 : /************************************************************************/
3432 :
3433 224 : static void CompactBoolArray(const struct ArrowSchema *schema,
3434 : struct ArrowArray *array, size_t iStart,
3435 : const std::vector<bool> &abyValidityFromFilters,
3436 : size_t nNewLength)
3437 : {
3438 224 : CPLAssert(array->n_children == 0);
3439 224 : CPLAssert(array->n_buffers == 2);
3440 224 : CPLAssert(static_cast<size_t>(array->length) >=
3441 : iStart + abyValidityFromFilters.size());
3442 :
3443 224 : const size_t nLength = abyValidityFromFilters.size();
3444 224 : const size_t nOffset = static_cast<size_t>(array->offset);
3445 224 : uint8_t *pabyData =
3446 224 : static_cast<uint8_t *>(const_cast<void *>(array->buffers[1]));
3447 224 : size_t j = iStart + nOffset;
3448 1147 : for (size_t i = 0; i < nLength; ++i)
3449 : {
3450 923 : if (abyValidityFromFilters[i])
3451 : {
3452 424 : if (TestBit(pabyData, i + iStart + nOffset))
3453 199 : SetBit(pabyData, j);
3454 : else
3455 225 : UnsetBit(pabyData, j);
3456 :
3457 424 : ++j;
3458 : }
3459 : }
3460 :
3461 224 : if (schema->flags & ARROW_FLAG_NULLABLE)
3462 224 : CompactValidityBuffer(schema, array, iStart, abyValidityFromFilters,
3463 : nNewLength);
3464 :
3465 224 : array->length = nNewLength;
3466 224 : }
3467 :
3468 : /************************************************************************/
3469 : /* CompactPrimitiveArray() */
3470 : /************************************************************************/
3471 :
3472 : template <class T>
3473 3575 : static void CompactPrimitiveArray(
3474 : const struct ArrowSchema *schema, struct ArrowArray *array, size_t iStart,
3475 : const std::vector<bool> &abyValidityFromFilters, size_t nNewLength)
3476 : {
3477 3575 : CPLAssert(array->n_children == 0);
3478 3575 : CPLAssert(array->n_buffers == 2);
3479 3575 : CPLAssert(static_cast<size_t>(array->length) >=
3480 : iStart + abyValidityFromFilters.size());
3481 :
3482 3575 : const size_t nLength = abyValidityFromFilters.size();
3483 3575 : const size_t nOffset = static_cast<size_t>(array->offset);
3484 3575 : T *paData =
3485 3575 : static_cast<T *>(const_cast<void *>(array->buffers[1])) + nOffset;
3486 3575 : size_t j = iStart;
3487 18134 : for (size_t i = 0; i < nLength; ++i)
3488 : {
3489 14559 : if (abyValidityFromFilters[i])
3490 : {
3491 6366 : paData[j] = paData[i + iStart];
3492 6366 : ++j;
3493 : }
3494 : }
3495 :
3496 3575 : if (schema->flags & ARROW_FLAG_NULLABLE)
3497 3564 : CompactValidityBuffer(schema, array, iStart, abyValidityFromFilters,
3498 : nNewLength);
3499 :
3500 3575 : array->length = nNewLength;
3501 3575 : }
3502 :
3503 : /************************************************************************/
3504 : /* CompactStringOrBinaryArray() */
3505 : /************************************************************************/
3506 :
3507 : template <class OffsetType>
3508 1187 : static void CompactStringOrBinaryArray(
3509 : const struct ArrowSchema *schema, struct ArrowArray *array, size_t iStart,
3510 : const std::vector<bool> &abyValidityFromFilters, size_t nNewLength)
3511 : {
3512 1187 : CPLAssert(array->n_children == 0);
3513 1187 : CPLAssert(array->n_buffers == 3);
3514 1187 : CPLAssert(static_cast<size_t>(array->length) >=
3515 : iStart + abyValidityFromFilters.size());
3516 :
3517 1187 : const size_t nLength = abyValidityFromFilters.size();
3518 1187 : const size_t nOffset = static_cast<size_t>(array->offset);
3519 1187 : OffsetType *panOffsets =
3520 1187 : static_cast<OffsetType *>(const_cast<void *>(array->buffers[1])) +
3521 : nOffset;
3522 1187 : GByte *pabyData =
3523 1187 : static_cast<GByte *>(const_cast<void *>(array->buffers[2]));
3524 1187 : size_t j = iStart;
3525 1187 : OffsetType nCurOffset = panOffsets[iStart];
3526 5103 : for (size_t i = 0; i < nLength; ++i)
3527 : {
3528 3916 : if (abyValidityFromFilters[i])
3529 : {
3530 1768 : const auto nStartOffset = panOffsets[i + iStart];
3531 1768 : const auto nEndOffset = panOffsets[i + iStart + 1];
3532 1768 : panOffsets[j] = nCurOffset;
3533 1768 : const auto nSize = static_cast<size_t>(nEndOffset - nStartOffset);
3534 1768 : if (nSize)
3535 : {
3536 1562 : if (nCurOffset < nStartOffset)
3537 : {
3538 636 : memmove(pabyData + nCurOffset, pabyData + nStartOffset,
3539 : nSize);
3540 : }
3541 1562 : nCurOffset += static_cast<OffsetType>(nSize);
3542 : }
3543 1768 : ++j;
3544 : }
3545 : }
3546 1187 : panOffsets[j] = nCurOffset;
3547 :
3548 1187 : if (schema->flags & ARROW_FLAG_NULLABLE)
3549 806 : CompactValidityBuffer(schema, array, iStart, abyValidityFromFilters,
3550 : nNewLength);
3551 :
3552 1187 : array->length = nNewLength;
3553 1187 : }
3554 :
3555 : /************************************************************************/
3556 : /* CompactStringViewArray() */
3557 : /************************************************************************/
3558 :
3559 0 : static void CompactStringViewArray(
3560 : const struct ArrowSchema *schema, struct ArrowArray *array, size_t iStart,
3561 : const std::vector<bool> &abyValidityFromFilters, size_t nNewLength)
3562 : {
3563 0 : CPLAssert(array->n_children == 0);
3564 0 : CPLAssert(array->n_buffers >= 2);
3565 0 : const size_t nLength = abyValidityFromFilters.size();
3566 0 : CPLAssert(static_cast<size_t>(array->length) >= iStart + nLength);
3567 :
3568 : // We only compact the string view buffer, not the string content buffers.
3569 0 : const size_t nOffset = static_cast<size_t>(array->offset);
3570 : // Cf https://arrow.apache.org/docs/format/Columnar.html#variable-size-binary-view-layout
3571 0 : uint32_t *panStringView =
3572 0 : static_cast<uint32_t *>(const_cast<void *>(array->buffers[1])) +
3573 : nOffset * N_VALUES_PER_STRING_VIEW;
3574 0 : for (size_t i = 0, j = 0; i < nLength; ++i)
3575 : {
3576 0 : if (abyValidityFromFilters[i])
3577 : {
3578 0 : if (j < i)
3579 : {
3580 0 : memmove(panStringView + (j + iStart) * N_VALUES_PER_STRING_VIEW,
3581 0 : panStringView + (i + iStart) * N_VALUES_PER_STRING_VIEW,
3582 : sizeof(panStringView[0]) * N_VALUES_PER_STRING_VIEW);
3583 : }
3584 0 : ++j;
3585 : }
3586 : }
3587 :
3588 0 : if (schema->flags & ARROW_FLAG_NULLABLE)
3589 0 : CompactValidityBuffer(schema, array, iStart, abyValidityFromFilters,
3590 : nNewLength);
3591 :
3592 0 : array->length = nNewLength;
3593 0 : }
3594 :
3595 : /************************************************************************/
3596 : /* CompactFixedWidthArray() */
3597 : /************************************************************************/
3598 :
3599 : static void
3600 305 : CompactFixedWidthArray(const struct ArrowSchema *schema,
3601 : struct ArrowArray *array, int nWidth, size_t iStart,
3602 : const std::vector<bool> &abyValidityFromFilters,
3603 : size_t nNewLength)
3604 : {
3605 305 : CPLAssert(array->n_children == 0);
3606 305 : CPLAssert(array->n_buffers == 2);
3607 305 : CPLAssert(static_cast<size_t>(array->length) >=
3608 : iStart + abyValidityFromFilters.size());
3609 :
3610 305 : const size_t nLength = abyValidityFromFilters.size();
3611 305 : const size_t nOffset = static_cast<size_t>(array->offset);
3612 305 : GByte *pabyData =
3613 305 : static_cast<GByte *>(const_cast<void *>(array->buffers[1]));
3614 305 : size_t nStartOffset = (iStart + nOffset) * nWidth;
3615 305 : size_t nCurOffset = nStartOffset;
3616 1133 : for (size_t i = 0; i < nLength; ++i, nStartOffset += nWidth)
3617 : {
3618 828 : if (abyValidityFromFilters[i])
3619 : {
3620 391 : if (nCurOffset < nStartOffset)
3621 : {
3622 210 : memcpy(pabyData + nCurOffset, pabyData + nStartOffset, nWidth);
3623 : }
3624 391 : nCurOffset += nWidth;
3625 : }
3626 : }
3627 :
3628 305 : if (schema->flags & ARROW_FLAG_NULLABLE)
3629 305 : CompactValidityBuffer(schema, array, iStart, abyValidityFromFilters,
3630 : nNewLength);
3631 :
3632 305 : array->length = nNewLength;
3633 305 : }
3634 :
3635 : /************************************************************************/
3636 : /* CompactStructArray() */
3637 : /************************************************************************/
3638 :
3639 : static bool CompactArray(const struct ArrowSchema *schema,
3640 : struct ArrowArray *array, size_t iStart,
3641 : const std::vector<bool> &abyValidityFromFilters,
3642 : size_t nNewLength);
3643 :
3644 665 : static bool CompactStructArray(const struct ArrowSchema *schema,
3645 : struct ArrowArray *array, size_t iStart,
3646 : const std::vector<bool> &abyValidityFromFilters,
3647 : size_t nNewLength)
3648 : {
3649 : // The equality might not be strict in the case of when some sub-arrays
3650 : // are fully void !
3651 665 : CPLAssert(array->n_children <= schema->n_children);
3652 6718 : for (int64_t iField = 0; iField < array->n_children; ++iField)
3653 : {
3654 6053 : const auto psChildSchema = schema->children[iField];
3655 6053 : const auto psChildArray = array->children[iField];
3656 : // To please Arrow validation...
3657 6053 : const size_t nChildNewLength =
3658 6053 : static_cast<size_t>(array->offset) + nNewLength;
3659 6053 : if (psChildArray->length > array->length)
3660 : {
3661 120 : std::vector<bool> abyChildValidity(abyValidityFromFilters);
3662 120 : abyChildValidity.resize(
3663 120 : abyValidityFromFilters.size() +
3664 120 : static_cast<size_t>(psChildArray->length - array->length),
3665 : false);
3666 120 : if (!CompactArray(psChildSchema, psChildArray, iStart,
3667 : abyChildValidity, nChildNewLength))
3668 : {
3669 0 : return false;
3670 : }
3671 : }
3672 : else
3673 : {
3674 5933 : if (!CompactArray(psChildSchema, psChildArray, iStart,
3675 : abyValidityFromFilters, nChildNewLength))
3676 : {
3677 0 : return false;
3678 : }
3679 : }
3680 6053 : CPLAssert(psChildArray->length ==
3681 : static_cast<int64_t>(nChildNewLength));
3682 : }
3683 :
3684 665 : if (schema->flags & ARROW_FLAG_NULLABLE)
3685 201 : CompactValidityBuffer(schema, array, iStart, abyValidityFromFilters,
3686 : nNewLength);
3687 :
3688 665 : array->length = nNewLength;
3689 :
3690 665 : return true;
3691 : }
3692 :
3693 : /************************************************************************/
3694 : /* InvalidateNullCountRec() */
3695 : /************************************************************************/
3696 :
3697 570 : static void InvalidateNullCountRec(const struct ArrowSchema *schema,
3698 : struct ArrowArray *array)
3699 : {
3700 570 : if (schema->flags & ARROW_FLAG_NULLABLE)
3701 210 : array->null_count = -1;
3702 960 : for (int i = 0; i < array->n_children; ++i)
3703 390 : InvalidateNullCountRec(schema->children[i], array->children[i]);
3704 570 : }
3705 :
3706 : /************************************************************************/
3707 : /* CompactListArray() */
3708 : /************************************************************************/
3709 :
3710 : template <class OffsetType>
3711 1773 : static bool CompactListArray(const struct ArrowSchema *schema,
3712 : struct ArrowArray *array, size_t iStart,
3713 : const std::vector<bool> &abyValidityFromFilters,
3714 : size_t nNewLength)
3715 : {
3716 1773 : CPLAssert(static_cast<size_t>(array->length) >=
3717 : iStart + abyValidityFromFilters.size());
3718 1773 : CPLAssert(array->n_children == 1);
3719 1773 : CPLAssert(array->n_buffers == 2);
3720 :
3721 1773 : const auto psChildSchema = schema->children[0];
3722 1773 : const auto psChildArray = array->children[0];
3723 :
3724 1773 : const size_t nLength = abyValidityFromFilters.size();
3725 1773 : const size_t nOffset = static_cast<size_t>(array->offset);
3726 1773 : OffsetType *panOffsets =
3727 1773 : static_cast<OffsetType *>(const_cast<void *>(array->buffers[1])) +
3728 : nOffset;
3729 :
3730 1773 : if (panOffsets[iStart + nLength] > panOffsets[iStart])
3731 : {
3732 3186 : std::vector<bool> abyChildValidity(
3733 1593 : static_cast<size_t>(panOffsets[iStart + nLength] -
3734 1593 : panOffsets[iStart]),
3735 : true);
3736 1593 : size_t j = iStart;
3737 1593 : OffsetType nCurOffset = panOffsets[iStart];
3738 6694 : for (size_t i = 0; i < nLength; ++i)
3739 : {
3740 5101 : if (abyValidityFromFilters[i])
3741 : {
3742 2142 : const auto nSize =
3743 2142 : panOffsets[i + iStart + 1] - panOffsets[i + iStart];
3744 2142 : panOffsets[j] = nCurOffset;
3745 2142 : nCurOffset += nSize;
3746 2142 : ++j;
3747 : }
3748 : else
3749 : {
3750 2959 : const auto nStartOffset = panOffsets[i + iStart];
3751 2959 : const auto nEndOffset = panOffsets[i + iStart + 1];
3752 2959 : if (nStartOffset != nEndOffset)
3753 : {
3754 3073 : if (nStartOffset >=
3755 1538 : panOffsets[iStart] + abyChildValidity.size())
3756 : {
3757 : // shouldn't happen in sane arrays...
3758 0 : CPLError(CE_Failure, CPLE_AppDefined,
3759 : "nStartOffset >= panOffsets[iStart] + "
3760 : "abyChildValidity.size()");
3761 0 : return false;
3762 : }
3763 : // nEndOffset might be equal to abyChildValidity.size()
3764 3073 : if (nEndOffset >
3765 1538 : panOffsets[iStart] + abyChildValidity.size())
3766 : {
3767 : // shouldn't happen in sane arrays...
3768 0 : CPLError(CE_Failure, CPLE_AppDefined,
3769 : "nEndOffset > panOffsets[iStart] + "
3770 : "abyChildValidity.size()");
3771 0 : return false;
3772 : }
3773 1538 : for (auto k = nStartOffset - panOffsets[iStart];
3774 4652 : k < nEndOffset - panOffsets[iStart]; ++k)
3775 3114 : abyChildValidity[static_cast<size_t>(k)] = false;
3776 : }
3777 : }
3778 : }
3779 1593 : panOffsets[j] = nCurOffset;
3780 1593 : const size_t nChildNewLength = static_cast<size_t>(panOffsets[j]);
3781 : // To please Arrow validation
3782 4552 : for (; j < iStart + nLength; ++j)
3783 2959 : panOffsets[j] = nCurOffset;
3784 :
3785 1593 : if (!CompactArray(psChildSchema, psChildArray,
3786 1593 : static_cast<size_t>(panOffsets[iStart]),
3787 : abyChildValidity, nChildNewLength))
3788 0 : return false;
3789 :
3790 1593 : CPLAssert(psChildArray->length ==
3791 : static_cast<int64_t>(nChildNewLength));
3792 : }
3793 : else
3794 : {
3795 180 : InvalidateNullCountRec(psChildSchema, psChildArray);
3796 : }
3797 :
3798 1773 : if (schema->flags & ARROW_FLAG_NULLABLE)
3799 1773 : CompactValidityBuffer(schema, array, iStart, abyValidityFromFilters,
3800 : nNewLength);
3801 :
3802 1773 : array->length = nNewLength;
3803 :
3804 1773 : return true;
3805 : }
3806 :
3807 : /************************************************************************/
3808 : /* CompactFixedSizeListArray() */
3809 : /************************************************************************/
3810 :
3811 : static bool
3812 809 : CompactFixedSizeListArray(const struct ArrowSchema *schema,
3813 : struct ArrowArray *array, size_t N, size_t iStart,
3814 : const std::vector<bool> &abyValidityFromFilters,
3815 : size_t nNewLength)
3816 : {
3817 809 : CPLAssert(static_cast<size_t>(array->length) >=
3818 : iStart + abyValidityFromFilters.size());
3819 809 : CPLAssert(array->n_children == 1);
3820 :
3821 809 : const auto psChildSchema = schema->children[0];
3822 809 : const auto psChildArray = array->children[0];
3823 :
3824 809 : const size_t nLength = abyValidityFromFilters.size();
3825 809 : const size_t nOffset = static_cast<size_t>(array->offset);
3826 1618 : std::vector<bool> abyChildValidity(N * nLength, true);
3827 809 : size_t nChildNewLength = (iStart + nOffset) * N;
3828 809 : size_t nSrcLength = 0;
3829 3198 : for (size_t i = 0; i < nLength; ++i)
3830 : {
3831 2389 : if (abyValidityFromFilters[i])
3832 : {
3833 1015 : nChildNewLength += N;
3834 1015 : nSrcLength++;
3835 : }
3836 : else
3837 : {
3838 1374 : const size_t nStartOffset = i * N;
3839 1374 : const size_t nEndOffset = (i + 1) * N;
3840 4122 : for (size_t k = nStartOffset; k < nEndOffset; ++k)
3841 2748 : abyChildValidity[k] = false;
3842 : }
3843 : }
3844 809 : CPL_IGNORE_RET_VAL(nSrcLength);
3845 809 : CPLAssert(iStart + nSrcLength == nNewLength);
3846 :
3847 809 : if (!CompactArray(psChildSchema, psChildArray, (iStart + nOffset) * N,
3848 : abyChildValidity, nChildNewLength))
3849 0 : return false;
3850 :
3851 809 : if (schema->flags & ARROW_FLAG_NULLABLE)
3852 809 : CompactValidityBuffer(schema, array, iStart, abyValidityFromFilters,
3853 : nNewLength);
3854 :
3855 809 : array->length = nNewLength;
3856 :
3857 809 : CPLAssert(psChildArray->length >=
3858 : static_cast<int64_t>(N) * (array->length + array->offset));
3859 :
3860 809 : return true;
3861 : }
3862 :
3863 : /************************************************************************/
3864 : /* CompactMapArray() */
3865 : /************************************************************************/
3866 :
3867 561 : static bool CompactMapArray(const struct ArrowSchema *schema,
3868 : struct ArrowArray *array, size_t iStart,
3869 : const std::vector<bool> &abyValidityFromFilters,
3870 : size_t nNewLength)
3871 : {
3872 561 : return CompactListArray<uint32_t>(schema, array, iStart,
3873 561 : abyValidityFromFilters, nNewLength);
3874 : }
3875 :
3876 : /************************************************************************/
3877 : /* CompactArray() */
3878 : /************************************************************************/
3879 :
3880 8455 : static bool CompactArray(const struct ArrowSchema *schema,
3881 : struct ArrowArray *array, size_t iStart,
3882 : const std::vector<bool> &abyValidityFromFilters,
3883 : size_t nNewLength)
3884 : {
3885 8455 : const char *format = schema->format;
3886 :
3887 8455 : if (IsStructure(format))
3888 : {
3889 582 : if (!CompactStructArray(schema, array, iStart, abyValidityFromFilters,
3890 : nNewLength))
3891 0 : return false;
3892 : }
3893 7873 : else if (IsList(format))
3894 : {
3895 1209 : if (!CompactListArray<uint32_t>(schema, array, iStart,
3896 : abyValidityFromFilters, nNewLength))
3897 0 : return false;
3898 : }
3899 6664 : else if (IsLargeList(format))
3900 : {
3901 3 : if (!CompactListArray<uint64_t>(schema, array, iStart,
3902 : abyValidityFromFilters, nNewLength))
3903 0 : return false;
3904 : }
3905 6661 : else if (IsMap(format))
3906 : {
3907 561 : if (!CompactMapArray(schema, array, iStart, abyValidityFromFilters,
3908 : nNewLength))
3909 0 : return false;
3910 : }
3911 6100 : else if (IsFixedSizeList(format))
3912 : {
3913 809 : const int N = GetFixedSizeList(format);
3914 809 : if (N <= 0)
3915 0 : return false;
3916 809 : if (!CompactFixedSizeListArray(schema, array, static_cast<size_t>(N),
3917 : iStart, abyValidityFromFilters,
3918 : nNewLength))
3919 0 : return false;
3920 : }
3921 5291 : else if (IsBoolean(format))
3922 : {
3923 224 : CompactBoolArray(schema, array, iStart, abyValidityFromFilters,
3924 : nNewLength);
3925 : }
3926 5067 : else if (IsInt8(format) || IsUInt8(format))
3927 : {
3928 444 : CompactPrimitiveArray<uint8_t>(schema, array, iStart,
3929 : abyValidityFromFilters, nNewLength);
3930 : }
3931 4623 : else if (IsInt16(format) || IsUInt16(format) || IsFloat16(format))
3932 : {
3933 458 : CompactPrimitiveArray<uint16_t>(schema, array, iStart,
3934 : abyValidityFromFilters, nNewLength);
3935 : }
3936 8037 : else if (IsInt32(format) || IsUInt32(format) || IsFloat32(format) ||
3937 11539 : strcmp(format, "tdD") == 0 || strcmp(format, "tts") == 0 ||
3938 3502 : strcmp(format, "ttm") == 0)
3939 : {
3940 794 : CompactPrimitiveArray<uint32_t>(schema, array, iStart,
3941 : abyValidityFromFilters, nNewLength);
3942 : }
3943 6023 : else if (IsInt64(format) || IsUInt64(format) || IsFloat64(format) ||
3944 1997 : strcmp(format, "tdm") == 0 || strcmp(format, "ttu") == 0 ||
3945 6023 : strcmp(format, "ttn") == 0 || strncmp(format, "ts", 2) == 0)
3946 : {
3947 1879 : CompactPrimitiveArray<uint64_t>(schema, array, iStart,
3948 : abyValidityFromFilters, nNewLength);
3949 : }
3950 1492 : else if (IsString(format) || IsBinary(format))
3951 : {
3952 983 : CompactStringOrBinaryArray<uint32_t>(
3953 : schema, array, iStart, abyValidityFromFilters, nNewLength);
3954 : }
3955 509 : else if (IsLargeString(format) || IsLargeBinary(format))
3956 : {
3957 204 : CompactStringOrBinaryArray<uint64_t>(
3958 : schema, array, iStart, abyValidityFromFilters, nNewLength);
3959 : }
3960 305 : else if (IsStringView(format))
3961 : {
3962 0 : CompactStringViewArray(schema, array, iStart, abyValidityFromFilters,
3963 : nNewLength);
3964 : }
3965 305 : else if (IsFixedWidthBinary(format))
3966 : {
3967 67 : const int nWidth = GetFixedWithBinary(format);
3968 67 : CompactFixedWidthArray(schema, array, nWidth, iStart,
3969 : abyValidityFromFilters, nNewLength);
3970 : }
3971 238 : else if (IsDecimal(format))
3972 : {
3973 238 : int nPrecision = 0;
3974 238 : int nScale = 0;
3975 238 : int nWidthInBytes = 0;
3976 238 : if (!ParseDecimalFormat(format, nPrecision, nScale, nWidthInBytes))
3977 : {
3978 0 : CPLError(CE_Failure, CPLE_AppDefined,
3979 : "Unexpected error in PostFilterArrowArray(): unhandled "
3980 : "field format: %s",
3981 : format);
3982 :
3983 0 : return false;
3984 : }
3985 238 : CompactFixedWidthArray(schema, array, nWidthInBytes, iStart,
3986 : abyValidityFromFilters, nNewLength);
3987 : }
3988 : else
3989 : {
3990 0 : CPLError(CE_Failure, CPLE_AppDefined,
3991 : "Unexpected error in CompactArray(): unhandled "
3992 : "field format: %s",
3993 : format);
3994 0 : return false;
3995 : }
3996 :
3997 8455 : return true;
3998 : }
3999 :
4000 : /************************************************************************/
4001 : /* FillValidityArrayFromWKBArray() */
4002 : /************************************************************************/
4003 :
4004 : template <class OffsetType>
4005 : static size_t
4006 21 : FillValidityArrayFromWKBArray(struct ArrowArray *array, const OGRLayer *poLayer,
4007 : std::vector<bool> &abyValidityFromFilters)
4008 : {
4009 21 : const size_t nLength = static_cast<size_t>(array->length);
4010 14 : const uint8_t *pabyValidity =
4011 21 : array->null_count == 0
4012 : ? nullptr
4013 7 : : static_cast<const uint8_t *>(array->buffers[0]);
4014 21 : const size_t nOffset = static_cast<size_t>(array->offset);
4015 21 : const OffsetType *panOffsets =
4016 21 : static_cast<const OffsetType *>(array->buffers[1]) + nOffset;
4017 21 : const GByte *pabyData = static_cast<const GByte *>(array->buffers[2]);
4018 21 : OGREnvelope sEnvelope;
4019 21 : abyValidityFromFilters.resize(nLength);
4020 21 : size_t nCountIntersecting = 0;
4021 138 : for (size_t i = 0; i < nLength; ++i)
4022 : {
4023 117 : if (!pabyValidity || TestBit(pabyValidity, i + nOffset))
4024 : {
4025 110 : const GByte *pabyWKB = pabyData + panOffsets[i];
4026 110 : const size_t nWKBSize =
4027 110 : static_cast<size_t>(panOffsets[i + 1] - panOffsets[i]);
4028 110 : if (poLayer->FilterWKBGeometry(pabyWKB, nWKBSize,
4029 : /* bEnvelopeAlreadySet=*/false,
4030 : sEnvelope))
4031 : {
4032 29 : abyValidityFromFilters[i] = true;
4033 29 : nCountIntersecting++;
4034 : }
4035 : }
4036 : }
4037 21 : return nCountIntersecting;
4038 : }
4039 :
4040 : /************************************************************************/
4041 : /* ArrowTimestampToOGRDateTime() */
4042 : /************************************************************************/
4043 :
4044 107 : static void ArrowTimestampToOGRDateTime(int64_t nTimestamp,
4045 : int nInvFactorToSecond,
4046 : const char *pszTZ, OGRFeature &oFeature,
4047 : int iField)
4048 : {
4049 107 : double floatingPart = 0;
4050 107 : if (nInvFactorToSecond)
4051 : {
4052 107 : floatingPart =
4053 107 : (nTimestamp % nInvFactorToSecond) / double(nInvFactorToSecond);
4054 107 : nTimestamp /= nInvFactorToSecond;
4055 : }
4056 107 : int nTZFlag = 0;
4057 107 : const size_t nTZLen = strlen(pszTZ);
4058 107 : if ((nTZLen == 3 && strcmp(pszTZ, "UTC") == 0) ||
4059 0 : (nTZLen == 7 && strcmp(pszTZ, "Etc/UTC") == 0))
4060 : {
4061 17 : nTZFlag = 100;
4062 : }
4063 90 : else if (nTZLen == 6 && (pszTZ[0] == '+' || pszTZ[0] == '-') &&
4064 33 : pszTZ[3] == ':')
4065 : {
4066 33 : int nTZHour = atoi(pszTZ + 1);
4067 33 : int nTZMin = atoi(pszTZ + 4);
4068 33 : if (nTZHour >= 0 && nTZHour <= 14 && nTZMin >= 0 && nTZMin < 60 &&
4069 33 : (nTZMin % 15) == 0)
4070 : {
4071 33 : nTZFlag = (nTZHour * 4) + (nTZMin / 15);
4072 33 : if (pszTZ[0] == '+')
4073 : {
4074 24 : nTZFlag = 100 + nTZFlag;
4075 24 : nTimestamp += nTZHour * 3600 + nTZMin * 60;
4076 : }
4077 : else
4078 : {
4079 9 : nTZFlag = 100 - nTZFlag;
4080 9 : nTimestamp -= nTZHour * 3600 + nTZMin * 60;
4081 : }
4082 : }
4083 : }
4084 : struct tm dt;
4085 107 : CPLUnixTimeToYMDHMS(nTimestamp, &dt);
4086 107 : oFeature.SetField(iField, dt.tm_year + 1900, dt.tm_mon + 1, dt.tm_mday,
4087 : dt.tm_hour, dt.tm_min,
4088 107 : static_cast<float>(dt.tm_sec + floatingPart), nTZFlag);
4089 107 : }
4090 :
4091 : /************************************************************************/
4092 : /* BuildMapFieldNameToArrowPath() */
4093 : /************************************************************************/
4094 :
4095 : static void
4096 334 : BuildMapFieldNameToArrowPath(const struct ArrowSchema *schema,
4097 : std::map<std::string, std::vector<int>> &oMap,
4098 : const std::string &osPrefix,
4099 : std::vector<int> &anArrowPath)
4100 : {
4101 7833 : for (int64_t i = 0; i < schema->n_children; ++i)
4102 : {
4103 7499 : auto psChild = schema->children[i];
4104 7499 : anArrowPath.push_back(static_cast<int>(i));
4105 7499 : if (IsStructure(psChild->format))
4106 : {
4107 400 : std::string osNewPrefix(osPrefix);
4108 200 : osNewPrefix += psChild->name;
4109 200 : osNewPrefix += ".";
4110 200 : BuildMapFieldNameToArrowPath(psChild, oMap, osNewPrefix,
4111 : anArrowPath);
4112 : }
4113 : else
4114 : {
4115 7299 : oMap[osPrefix + psChild->name] = anArrowPath;
4116 : }
4117 7499 : anArrowPath.pop_back();
4118 : }
4119 334 : }
4120 :
4121 : /************************************************************************/
4122 : /* FillFieldList() */
4123 : /************************************************************************/
4124 :
4125 : template <typename ListOffsetType, typename ArrowType,
4126 : typename OGRType = ArrowType>
4127 167 : inline static void FillFieldList(const struct ArrowArray *array,
4128 : int iOGRFieldIdx, size_t nOffsettedIndex,
4129 : const struct ArrowArray *childArray,
4130 : OGRFeature &oFeature)
4131 : {
4132 167 : const auto panOffsets =
4133 167 : static_cast<const ListOffsetType *>(array->buffers[1]) +
4134 : nOffsettedIndex;
4135 334 : std::vector<OGRType> aValues;
4136 167 : const auto *paValues =
4137 167 : static_cast<const ArrowType *>(childArray->buffers[1]);
4138 167 : for (size_t i = static_cast<size_t>(panOffsets[0]);
4139 509 : i < static_cast<size_t>(panOffsets[1]); ++i)
4140 : {
4141 342 : aValues.push_back(static_cast<OGRType>(paValues[i]));
4142 : }
4143 167 : oFeature.SetField(iOGRFieldIdx, static_cast<int>(aValues.size()),
4144 : aValues.data());
4145 167 : }
4146 :
4147 : /************************************************************************/
4148 : /* FillFieldListFromBool() */
4149 : /************************************************************************/
4150 :
4151 : template <typename ListOffsetType>
4152 : inline static void
4153 16 : FillFieldListFromBool(const struct ArrowArray *array, int iOGRFieldIdx,
4154 : size_t nOffsettedIndex,
4155 : const struct ArrowArray *childArray, OGRFeature &oFeature)
4156 : {
4157 16 : const auto panOffsets =
4158 16 : static_cast<const ListOffsetType *>(array->buffers[1]) +
4159 : nOffsettedIndex;
4160 32 : std::vector<int> aValues;
4161 16 : const auto *paValues = static_cast<const uint8_t *>(childArray->buffers[1]);
4162 16 : for (size_t i = static_cast<size_t>(panOffsets[0]);
4163 48 : i < static_cast<size_t>(panOffsets[1]); ++i)
4164 : {
4165 32 : aValues.push_back(TestBit(paValues, i) ? 1 : 0);
4166 : }
4167 16 : oFeature.SetField(iOGRFieldIdx, static_cast<int>(aValues.size()),
4168 16 : aValues.data());
4169 16 : }
4170 :
4171 : /************************************************************************/
4172 : /* FillFieldListFromHalfFloat() */
4173 : /************************************************************************/
4174 :
4175 : template <typename ListOffsetType>
4176 8 : inline static void FillFieldListFromHalfFloat(
4177 : const struct ArrowArray *array, int iOGRFieldIdx, size_t nOffsettedIndex,
4178 : const struct ArrowArray *childArray, OGRFeature &oFeature)
4179 : {
4180 8 : const auto panOffsets =
4181 8 : static_cast<const ListOffsetType *>(array->buffers[1]) +
4182 : nOffsettedIndex;
4183 16 : std::vector<double> aValues;
4184 8 : const auto *phfValues =
4185 8 : static_cast<const GFloat16 *>(childArray->buffers[1]);
4186 8 : for (size_t i = static_cast<size_t>(panOffsets[0]);
4187 24 : i < static_cast<size_t>(panOffsets[1]); ++i)
4188 : {
4189 16 : aValues.push_back(static_cast<double>(phfValues[i]));
4190 : }
4191 8 : oFeature.SetField(iOGRFieldIdx, static_cast<int>(aValues.size()),
4192 8 : aValues.data());
4193 8 : }
4194 :
4195 : /************************************************************************/
4196 : /* FillFieldListFromString() */
4197 : /************************************************************************/
4198 :
4199 : template <typename ListOffsetType, typename StringOffsetType>
4200 32 : inline static void FillFieldListFromString(const struct ArrowArray *array,
4201 : int iOGRFieldIdx,
4202 : size_t nOffsettedIndex,
4203 : const struct ArrowArray *childArray,
4204 : OGRFeature &oFeature)
4205 : {
4206 32 : const auto panOffsets =
4207 32 : static_cast<const ListOffsetType *>(array->buffers[1]) +
4208 : nOffsettedIndex;
4209 64 : CPLStringList aosVals;
4210 90 : for (size_t i = static_cast<size_t>(panOffsets[0]);
4211 90 : i < static_cast<size_t>(panOffsets[1]); ++i)
4212 : {
4213 58 : aosVals.push_back(
4214 : GetStringAsStringView<StringOffsetType>(childArray, i));
4215 : }
4216 32 : oFeature.SetField(iOGRFieldIdx, aosVals.List());
4217 32 : }
4218 :
4219 : /************************************************************************/
4220 : /* FillFieldListFromStringView() */
4221 : /************************************************************************/
4222 :
4223 : template <typename ListOffsetType>
4224 0 : inline static void FillFieldListFromStringView(
4225 : const struct ArrowArray *array, int iOGRFieldIdx, size_t nOffsettedIndex,
4226 : const struct ArrowArray *childArray, OGRFeature &oFeature)
4227 : {
4228 0 : const auto panOffsets =
4229 0 : static_cast<const ListOffsetType *>(array->buffers[1]) +
4230 : nOffsettedIndex;
4231 0 : CPLStringList aosVals;
4232 0 : for (size_t i = static_cast<size_t>(panOffsets[0]);
4233 0 : i < static_cast<size_t>(panOffsets[1]); ++i)
4234 : {
4235 0 : aosVals.push_back(GetStringView(childArray, i));
4236 : }
4237 0 : oFeature.SetField(iOGRFieldIdx, aosVals.List());
4238 0 : }
4239 :
4240 : /************************************************************************/
4241 : /* FillFieldFixedSizeList() */
4242 : /************************************************************************/
4243 :
4244 : template <typename ArrowType, typename OGRType = ArrowType>
4245 120 : inline static void FillFieldFixedSizeList(
4246 : const struct ArrowArray *, int iOGRFieldIdx, size_t nOffsettedIndex,
4247 : const int nItems, const struct ArrowArray *childArray, OGRFeature &oFeature)
4248 : {
4249 240 : std::vector<OGRType> aValues;
4250 120 : const auto *paValues =
4251 120 : static_cast<const ArrowType *>(childArray->buffers[1]) +
4252 120 : childArray->offset + nOffsettedIndex * nItems;
4253 360 : for (int i = 0; i < nItems; ++i)
4254 : {
4255 240 : aValues.push_back(static_cast<OGRType>(paValues[i]));
4256 : }
4257 120 : oFeature.SetField(iOGRFieldIdx, static_cast<int>(aValues.size()),
4258 : aValues.data());
4259 120 : }
4260 :
4261 : /************************************************************************/
4262 : /* FillFieldFixedSizeListString() */
4263 : /************************************************************************/
4264 :
4265 : template <typename StringOffsetType>
4266 17 : inline static void FillFieldFixedSizeListString(
4267 : const struct ArrowArray *, int iOGRFieldIdx, size_t nOffsettedIndex,
4268 : const int nItems, const struct ArrowArray *childArray, OGRFeature &oFeature)
4269 : {
4270 34 : CPLStringList aosVals;
4271 51 : for (int i = 0; i < nItems; ++i)
4272 : {
4273 34 : aosVals.push_back(GetStringAsStringView<StringOffsetType>(
4274 34 : childArray, nOffsettedIndex * nItems + i));
4275 : }
4276 17 : oFeature.SetField(iOGRFieldIdx, aosVals.List());
4277 17 : }
4278 :
4279 : /************************************************************************/
4280 : /* FillFieldFixedSizeListStringView() */
4281 : /************************************************************************/
4282 :
4283 0 : inline static void FillFieldFixedSizeListStringView(
4284 : const struct ArrowArray *, int iOGRFieldIdx, size_t nOffsettedIndex,
4285 : const int nItems, const struct ArrowArray *childArray, OGRFeature &oFeature)
4286 : {
4287 0 : CPLStringList aosVals;
4288 0 : for (int i = 0; i < nItems; ++i)
4289 : {
4290 0 : aosVals.push_back(
4291 0 : GetStringView(childArray, nOffsettedIndex * nItems + i));
4292 : }
4293 0 : oFeature.SetField(iOGRFieldIdx, aosVals.List());
4294 0 : }
4295 :
4296 : /************************************************************************/
4297 : /* GetValue() */
4298 : /************************************************************************/
4299 :
4300 : template <typename ArrowType>
4301 245 : inline static ArrowType GetValue(const struct ArrowArray *array,
4302 : size_t iFeature)
4303 : {
4304 245 : const auto *panValues = static_cast<const ArrowType *>(array->buffers[1]);
4305 245 : return panValues[iFeature + array->offset];
4306 : }
4307 :
4308 12 : template <> bool GetValue<bool>(const struct ArrowArray *array, size_t iFeature)
4309 : {
4310 12 : const auto *pabyValues = static_cast<const uint8_t *>(array->buffers[1]);
4311 12 : return TestBit(pabyValues, iFeature + static_cast<size_t>(array->offset));
4312 : }
4313 :
4314 : /************************************************************************/
4315 : /* GetValueFloat16() */
4316 : /************************************************************************/
4317 :
4318 23 : static float GetValueFloat16(const struct ArrowArray *array, const size_t nIdx)
4319 : {
4320 23 : const auto *panValues = static_cast<const uint16_t *>(array->buffers[1]);
4321 : const auto nFloat16AsUInt32 =
4322 23 : CPLHalfToFloat(panValues[nIdx + array->offset]);
4323 : float f;
4324 23 : memcpy(&f, &nFloat16AsUInt32, sizeof(f));
4325 23 : return f;
4326 : }
4327 :
4328 : /************************************************************************/
4329 : /* GetValueDecimal() */
4330 : /************************************************************************/
4331 :
4332 71 : static double GetValueDecimal(const struct ArrowArray *array,
4333 : const int nWidthIn64BitWord, const int nScale,
4334 : const size_t nIdx)
4335 : {
4336 : #ifdef CPL_LSB
4337 71 : const auto nIdxIn64BitWord = nIdx * nWidthIn64BitWord;
4338 : #else
4339 : const auto nIdxIn64BitWord =
4340 : nIdx * nWidthIn64BitWord + nWidthIn64BitWord - 1;
4341 : #endif
4342 71 : const auto *panValues = static_cast<const int64_t *>(array->buffers[1]);
4343 71 : const auto nVal =
4344 71 : panValues[nIdxIn64BitWord + array->offset * nWidthIn64BitWord];
4345 71 : return static_cast<double>(nVal) * std::pow(10.0, -nScale);
4346 : }
4347 :
4348 : /************************************************************************/
4349 : /* GetBinaryAsBase64() */
4350 : /************************************************************************/
4351 :
4352 : template <class OffsetType>
4353 8 : static std::string GetBinaryAsBase64(const struct ArrowArray *array,
4354 : const size_t nIdx)
4355 : {
4356 8 : const OffsetType *panOffsets =
4357 8 : static_cast<const OffsetType *>(array->buffers[1]) +
4358 8 : static_cast<size_t>(array->offset) + nIdx;
4359 8 : const GByte *pabyData = static_cast<const GByte *>(array->buffers[2]);
4360 8 : const size_t nLen = static_cast<size_t>(panOffsets[1] - panOffsets[0]);
4361 8 : if (nLen > static_cast<size_t>(std::numeric_limits<int>::max()))
4362 : {
4363 0 : CPLError(CE_Failure, CPLE_AppDefined, "Too large binary");
4364 0 : return std::string();
4365 : }
4366 16 : char *pszVal = CPLBase64Encode(
4367 8 : static_cast<int>(nLen), pabyData + static_cast<size_t>(panOffsets[0]));
4368 16 : std::string osStr(pszVal);
4369 8 : CPLFree(pszVal);
4370 8 : return osStr;
4371 : }
4372 :
4373 : /************************************************************************/
4374 : /* GetValueFixedWithBinaryAsBase64() */
4375 : /************************************************************************/
4376 :
4377 : static std::string
4378 4 : GetValueFixedWithBinaryAsBase64(const struct ArrowArray *array,
4379 : const int nWidth, const size_t nIdx)
4380 : {
4381 4 : const GByte *pabyData = static_cast<const GByte *>(array->buffers[1]);
4382 8 : char *pszVal = CPLBase64Encode(
4383 : nWidth,
4384 4 : pabyData + (static_cast<size_t>(array->offset) + nIdx) * nWidth);
4385 4 : std::string osStr(pszVal);
4386 4 : CPLFree(pszVal);
4387 4 : return osStr;
4388 : }
4389 :
4390 : static CPLJSONObject GetObjectAsJSON(const struct ArrowSchema *schema,
4391 : const struct ArrowArray *array,
4392 : const size_t nIdx);
4393 :
4394 : /************************************************************************/
4395 : /* AddToArray() */
4396 : /************************************************************************/
4397 :
4398 142 : static void AddToArray(CPLJSONArray &oArray, const struct ArrowSchema *schema,
4399 : const struct ArrowArray *array, const size_t nIdx)
4400 : {
4401 142 : if (IsBoolean(schema->format))
4402 7 : oArray.Add(GetValue<bool>(array, nIdx));
4403 135 : else if (IsUInt8(schema->format))
4404 13 : oArray.Add(GetValue<uint8_t>(array, nIdx));
4405 122 : else if (IsInt8(schema->format))
4406 7 : oArray.Add(GetValue<int8_t>(array, nIdx));
4407 115 : else if (IsUInt16(schema->format))
4408 7 : oArray.Add(GetValue<uint16_t>(array, nIdx));
4409 108 : else if (IsInt16(schema->format))
4410 7 : oArray.Add(GetValue<int16_t>(array, nIdx));
4411 101 : else if (IsUInt32(schema->format))
4412 7 : oArray.Add(static_cast<GIntBig>(GetValue<uint32_t>(array, nIdx)));
4413 94 : else if (IsInt32(schema->format))
4414 7 : oArray.Add(GetValue<int32_t>(array, nIdx));
4415 87 : else if (IsUInt64(schema->format))
4416 7 : oArray.Add(GetValue<uint64_t>(array, nIdx));
4417 80 : else if (IsInt64(schema->format))
4418 7 : oArray.Add(static_cast<GIntBig>(GetValue<int64_t>(array, nIdx)));
4419 73 : else if (IsFloat16(schema->format))
4420 7 : oArray.Add(static_cast<double>(GetValueFloat16(array, nIdx)));
4421 66 : else if (IsFloat32(schema->format))
4422 7 : oArray.Add(static_cast<double>(GetValue<float>(array, nIdx)));
4423 59 : else if (IsFloat64(schema->format))
4424 7 : oArray.Add(GetValue<double>(array, nIdx));
4425 52 : else if (IsString(schema->format))
4426 13 : oArray.Add(GetStringAsStringView<uint32_t>(array, nIdx));
4427 39 : else if (IsLargeString(schema->format))
4428 4 : oArray.Add(GetStringAsStringView<uint64_t>(array, nIdx));
4429 35 : else if (IsStringView(schema->format))
4430 0 : oArray.Add(GetStringView(array, nIdx));
4431 35 : else if (IsBinary(schema->format))
4432 2 : oArray.Add(GetBinaryAsBase64<uint32_t>(array, nIdx));
4433 33 : else if (IsLargeBinary(schema->format))
4434 2 : oArray.Add(GetBinaryAsBase64<uint64_t>(array, nIdx));
4435 31 : else if (IsFixedWidthBinary(schema->format))
4436 2 : oArray.Add(GetValueFixedWithBinaryAsBase64(
4437 2 : array, GetFixedWithBinary(schema->format), nIdx));
4438 29 : else if (IsDecimal(schema->format))
4439 : {
4440 7 : int nPrecision = 0;
4441 7 : int nScale = 0;
4442 7 : int nWidthInBytes = 0;
4443 7 : const bool bOK = ParseDecimalFormat(schema->format, nPrecision, nScale,
4444 7 : nWidthInBytes);
4445 : // Already validated
4446 7 : CPLAssert(bOK);
4447 7 : CPL_IGNORE_RET_VAL(bOK);
4448 7 : oArray.Add(GetValueDecimal(array, nWidthInBytes / 8, nScale, nIdx));
4449 : }
4450 : else
4451 22 : oArray.Add(GetObjectAsJSON(schema, array, nIdx));
4452 142 : }
4453 :
4454 : /************************************************************************/
4455 : /* GetListAsJSON() */
4456 : /************************************************************************/
4457 :
4458 : template <class OffsetType>
4459 112 : static CPLJSONArray GetListAsJSON(const struct ArrowSchema *schema,
4460 : const struct ArrowArray *array,
4461 : const size_t nIdx)
4462 : {
4463 112 : CPLJSONArray oArray;
4464 112 : const auto panOffsets = static_cast<const OffsetType *>(array->buffers[1]) +
4465 112 : array->offset + nIdx;
4466 112 : const auto childSchema = schema->children[0];
4467 112 : const auto childArray = array->children[0];
4468 5 : const uint8_t *pabyValidity =
4469 112 : childArray->null_count == 0
4470 : ? nullptr
4471 107 : : static_cast<const uint8_t *>(childArray->buffers[0]);
4472 278 : for (size_t k = static_cast<size_t>(panOffsets[0]);
4473 278 : k < static_cast<size_t>(panOffsets[1]); k++)
4474 : {
4475 318 : if (!pabyValidity ||
4476 152 : TestBit(pabyValidity, k + static_cast<size_t>(childArray->offset)))
4477 : {
4478 136 : AddToArray(oArray, childSchema, childArray, k);
4479 : }
4480 : else
4481 : {
4482 30 : oArray.AddNull();
4483 : }
4484 : }
4485 112 : return oArray;
4486 : }
4487 :
4488 : /************************************************************************/
4489 : /* GetFixedSizeListAsJSON() */
4490 : /************************************************************************/
4491 :
4492 3 : static CPLJSONArray GetFixedSizeListAsJSON(const struct ArrowSchema *schema,
4493 : const struct ArrowArray *array,
4494 : const size_t nIdx)
4495 : {
4496 3 : CPLJSONArray oArray;
4497 3 : const int nVals = GetFixedSizeList(schema->format);
4498 3 : const auto childSchema = schema->children[0];
4499 3 : const auto childArray = array->children[0];
4500 3 : const uint8_t *pabyValidity =
4501 3 : childArray->null_count == 0
4502 3 : ? nullptr
4503 3 : : static_cast<const uint8_t *>(childArray->buffers[0]);
4504 9 : for (size_t k = nIdx * nVals; k < (nIdx + 1) * nVals; k++)
4505 : {
4506 12 : if (!pabyValidity ||
4507 6 : TestBit(pabyValidity, k + static_cast<size_t>(childArray->offset)))
4508 : {
4509 6 : AddToArray(oArray, childSchema, childArray, k);
4510 : }
4511 : else
4512 : {
4513 0 : oArray.AddNull();
4514 : }
4515 : }
4516 3 : return oArray;
4517 : }
4518 :
4519 : /************************************************************************/
4520 : /* AddToDict() */
4521 : /************************************************************************/
4522 :
4523 198 : static void AddToDict(CPLJSONObject &oDict, const std::string &osKey,
4524 : const struct ArrowSchema *schema,
4525 : const struct ArrowArray *array, const size_t nIdx)
4526 : {
4527 198 : if (IsBoolean(schema->format))
4528 5 : oDict.Add(osKey, GetValue<bool>(array, nIdx));
4529 193 : else if (IsUInt8(schema->format))
4530 5 : oDict.Add(osKey, GetValue<uint8_t>(array, nIdx));
4531 188 : else if (IsInt8(schema->format))
4532 5 : oDict.Add(osKey, GetValue<int8_t>(array, nIdx));
4533 183 : else if (IsUInt16(schema->format))
4534 5 : oDict.Add(osKey, GetValue<uint16_t>(array, nIdx));
4535 178 : else if (IsInt16(schema->format))
4536 5 : oDict.Add(osKey, GetValue<int16_t>(array, nIdx));
4537 173 : else if (IsUInt32(schema->format))
4538 2 : oDict.Add(osKey, static_cast<GIntBig>(GetValue<uint32_t>(array, nIdx)));
4539 171 : else if (IsInt32(schema->format))
4540 6 : oDict.Add(osKey, GetValue<int32_t>(array, nIdx));
4541 165 : else if (IsUInt64(schema->format))
4542 5 : oDict.Add(osKey, GetValue<uint64_t>(array, nIdx));
4543 160 : else if (IsInt64(schema->format))
4544 22 : oDict.Add(osKey, static_cast<GIntBig>(GetValue<int64_t>(array, nIdx)));
4545 138 : else if (IsFloat16(schema->format))
4546 2 : oDict.Add(osKey, static_cast<double>(GetValueFloat16(array, nIdx)));
4547 136 : else if (IsFloat32(schema->format))
4548 5 : oDict.Add(osKey, static_cast<double>(GetValue<float>(array, nIdx)));
4549 131 : else if (IsFloat64(schema->format))
4550 19 : oDict.Add(osKey, GetValue<double>(array, nIdx));
4551 112 : else if (IsString(schema->format))
4552 14 : oDict.Add(osKey, GetStringAsStringView<uint32_t>(array, nIdx));
4553 98 : else if (IsLargeString(schema->format))
4554 2 : oDict.Add(osKey, GetStringAsStringView<uint64_t>(array, nIdx));
4555 96 : else if (IsStringView(schema->format))
4556 0 : oDict.Add(osKey, GetStringView(array, nIdx));
4557 96 : else if (IsBinary(schema->format))
4558 2 : oDict.Add(osKey, GetBinaryAsBase64<uint32_t>(array, nIdx));
4559 94 : else if (IsLargeBinary(schema->format))
4560 2 : oDict.Add(osKey, GetBinaryAsBase64<uint64_t>(array, nIdx));
4561 92 : else if (IsFixedWidthBinary(schema->format))
4562 2 : oDict.Add(osKey, GetValueFixedWithBinaryAsBase64(
4563 2 : array, GetFixedWithBinary(schema->format), nIdx));
4564 90 : else if (IsDecimal(schema->format))
4565 : {
4566 8 : int nPrecision = 0;
4567 8 : int nScale = 0;
4568 8 : int nWidthInBytes = 0;
4569 8 : const bool bOK = ParseDecimalFormat(schema->format, nPrecision, nScale,
4570 8 : nWidthInBytes);
4571 : // Already validated
4572 8 : CPLAssert(bOK);
4573 8 : CPL_IGNORE_RET_VAL(bOK);
4574 8 : oDict.Add(osKey,
4575 : GetValueDecimal(array, nWidthInBytes / 8, nScale, nIdx));
4576 : }
4577 : else
4578 82 : oDict.Add(osKey, GetObjectAsJSON(schema, array, nIdx));
4579 198 : }
4580 :
4581 : /************************************************************************/
4582 : /* GetMapAsJSON() */
4583 : /************************************************************************/
4584 :
4585 243 : static CPLJSONObject GetMapAsJSON(const struct ArrowSchema *schema,
4586 : const struct ArrowArray *array,
4587 : const size_t nIdx)
4588 : {
4589 243 : const auto schemaStruct = schema->children[0];
4590 243 : if (!IsStructure(schemaStruct->format))
4591 : {
4592 0 : CPLError(CE_Failure, CPLE_AppDefined,
4593 : "GetMapAsJSON(): !IsStructure(schemaStruct->format))");
4594 0 : return CPLJSONObject();
4595 : }
4596 243 : const auto schemaKey = schemaStruct->children[0];
4597 243 : const auto schemaValues = schemaStruct->children[1];
4598 243 : if (!IsString(schemaKey->format))
4599 : {
4600 0 : CPLError(CE_Failure, CPLE_AppDefined,
4601 : "GetMapAsJSON(): !IsString(schemaKey->format))");
4602 0 : return CPLJSONObject();
4603 : }
4604 243 : const auto arrayKeys = array->children[0]->children[0];
4605 243 : const auto arrayValues = array->children[0]->children[1];
4606 :
4607 486 : CPLJSONObject oDict;
4608 243 : const auto panOffsets =
4609 243 : static_cast<const uint32_t *>(array->buffers[1]) + array->offset + nIdx;
4610 243 : const uint8_t *pabyValidityKeys =
4611 243 : arrayKeys->null_count == 0
4612 243 : ? nullptr
4613 0 : : static_cast<const uint8_t *>(arrayKeys->buffers[0]);
4614 243 : const uint32_t *panOffsetsKeys =
4615 243 : static_cast<const uint32_t *>(arrayKeys->buffers[1]) +
4616 243 : arrayKeys->offset;
4617 243 : const char *pabyKeys = static_cast<const char *>(arrayKeys->buffers[2]);
4618 243 : const uint8_t *pabyValidityValues =
4619 243 : arrayValues->null_count == 0
4620 243 : ? nullptr
4621 237 : : static_cast<const uint8_t *>(arrayValues->buffers[0]);
4622 463 : for (uint32_t k = panOffsets[0]; k < panOffsets[1]; k++)
4623 : {
4624 220 : if (!pabyValidityKeys ||
4625 0 : TestBit(pabyValidityKeys,
4626 0 : k + static_cast<size_t>(arrayKeys->offset)))
4627 : {
4628 440 : std::string osKey;
4629 220 : osKey.assign(pabyKeys + panOffsetsKeys[k],
4630 220 : panOffsetsKeys[k + 1] - panOffsetsKeys[k]);
4631 :
4632 433 : if (!pabyValidityValues ||
4633 213 : TestBit(pabyValidityValues,
4634 213 : k + static_cast<size_t>(arrayValues->offset)))
4635 : {
4636 168 : AddToDict(oDict, osKey, schemaValues, arrayValues, k);
4637 : }
4638 : else
4639 : {
4640 52 : oDict.AddNull(osKey);
4641 : }
4642 : }
4643 : }
4644 243 : return oDict;
4645 : }
4646 :
4647 : /************************************************************************/
4648 : /* GetStructureAsJSON() */
4649 : /************************************************************************/
4650 :
4651 16 : static CPLJSONObject GetStructureAsJSON(const struct ArrowSchema *schema,
4652 : const struct ArrowArray *array,
4653 : const size_t nIdx)
4654 : {
4655 16 : CPLJSONObject oDict;
4656 62 : for (int64_t k = 0; k < array->n_children; k++)
4657 : {
4658 46 : const uint8_t *pabyValidityValues =
4659 46 : array->children[k]->null_count == 0
4660 46 : ? nullptr
4661 36 : : static_cast<const uint8_t *>(array->children[k]->buffers[0]);
4662 82 : if (!pabyValidityValues ||
4663 36 : TestBit(pabyValidityValues,
4664 36 : nIdx + static_cast<size_t>(array->children[k]->offset)))
4665 : {
4666 30 : AddToDict(oDict, schema->children[k]->name, schema->children[k],
4667 30 : array->children[k], nIdx);
4668 : }
4669 : else
4670 : {
4671 16 : oDict.AddNull(schema->children[k]->name);
4672 : }
4673 : }
4674 16 : return oDict;
4675 : }
4676 :
4677 : /************************************************************************/
4678 : /* GetObjectAsJSON() */
4679 : /************************************************************************/
4680 :
4681 104 : static CPLJSONObject GetObjectAsJSON(const struct ArrowSchema *schema,
4682 : const struct ArrowArray *array,
4683 : const size_t nIdx)
4684 : {
4685 104 : if (IsMap(schema->format))
4686 4 : return GetMapAsJSON(schema, array, nIdx);
4687 100 : else if (IsList(schema->format))
4688 156 : return GetListAsJSON<uint32_t>(schema, array, nIdx);
4689 22 : else if (IsLargeList(schema->format))
4690 6 : return GetListAsJSON<uint64_t>(schema, array, nIdx);
4691 19 : else if (IsFixedSizeList(schema->format))
4692 6 : return GetFixedSizeListAsJSON(schema, array, nIdx);
4693 16 : else if (IsStructure(schema->format))
4694 16 : return GetStructureAsJSON(schema, array, nIdx);
4695 : else
4696 : {
4697 0 : CPLError(CE_Failure, CPLE_AppDefined,
4698 : "GetObjectAsJSON(): unhandled value format: %s",
4699 0 : schema->format);
4700 0 : return CPLJSONObject();
4701 : }
4702 : }
4703 :
4704 : /************************************************************************/
4705 : /* SetFieldForOtherFormats() */
4706 : /************************************************************************/
4707 :
4708 856 : static bool SetFieldForOtherFormats(OGRFeature &oFeature,
4709 : const int iOGRFieldIndex,
4710 : const size_t nOffsettedIndex,
4711 : const struct ArrowSchema *schema,
4712 : const struct ArrowArray *array)
4713 : {
4714 856 : const char *format = schema->format;
4715 856 : if (IsFloat16(format))
4716 : {
4717 4 : oFeature.SetField(
4718 : iOGRFieldIndex,
4719 4 : static_cast<double>(GetValueFloat16(
4720 4 : array, nOffsettedIndex - static_cast<size_t>(array->offset))));
4721 : }
4722 :
4723 852 : else if (IsFixedWidthBinary(format))
4724 : {
4725 : // Fixed width binary
4726 17 : const int nWidth = GetFixedWithBinary(format);
4727 17 : oFeature.SetField(iOGRFieldIndex, nWidth,
4728 17 : static_cast<const GByte *>(array->buffers[1]) +
4729 17 : nOffsettedIndex * nWidth);
4730 : }
4731 835 : else if (format[0] == 't' && format[1] == 'd' &&
4732 38 : format[2] == 'D') // strcmp(format, "tdD") == 0
4733 : {
4734 : // date32[days]
4735 : // number of days since Epoch
4736 33 : int64_t timestamp = static_cast<int64_t>(static_cast<const int32_t *>(
4737 33 : array->buffers[1])[nOffsettedIndex]) *
4738 : 3600 * 24;
4739 : struct tm dt;
4740 33 : CPLUnixTimeToYMDHMS(timestamp, &dt);
4741 33 : oFeature.SetField(iOGRFieldIndex, dt.tm_year + 1900, dt.tm_mon + 1,
4742 : dt.tm_mday, 0, 0, 0);
4743 33 : return true;
4744 : }
4745 802 : else if (format[0] == 't' && format[1] == 'd' &&
4746 5 : format[2] == 'm') // strcmp(format, "tdm") == 0
4747 : {
4748 : // date64[milliseconds]
4749 : // number of milliseconds since Epoch
4750 5 : int64_t timestamp =
4751 5 : static_cast<const int64_t *>(array->buffers[1])[nOffsettedIndex] /
4752 : 1000;
4753 : struct tm dt;
4754 5 : CPLUnixTimeToYMDHMS(timestamp, &dt);
4755 5 : oFeature.SetField(iOGRFieldIndex, dt.tm_year + 1900, dt.tm_mon + 1,
4756 5 : dt.tm_mday, 0, 0, 0);
4757 : }
4758 797 : else if (format[0] == 't' && format[1] == 't' &&
4759 39 : format[2] == 's') // strcmp(format, "tts") == 0
4760 : {
4761 : // time32 [seconds]
4762 0 : int32_t value =
4763 0 : static_cast<const int32_t *>(array->buffers[1])[nOffsettedIndex];
4764 0 : const int nHour = value / 3600;
4765 0 : const int nMinute = (value / 60) % 60;
4766 0 : const int nSecond = value % 60;
4767 0 : oFeature.SetField(iOGRFieldIndex, 0, 0, 0, nHour, nMinute,
4768 0 : static_cast<float>(nSecond));
4769 : }
4770 797 : else if (format[0] == 't' && format[1] == 't' &&
4771 39 : format[2] == 'm') // strcmp(format, "ttm") == 0
4772 : {
4773 : // time32 [milliseconds]
4774 25 : int32_t value =
4775 25 : static_cast<const int32_t *>(array->buffers[1])[nOffsettedIndex];
4776 25 : double floatingPart = (value % 1000) / 1e3;
4777 25 : value /= 1000;
4778 25 : const int nHour = value / 3600;
4779 25 : const int nMinute = (value / 60) % 60;
4780 25 : const int nSecond = value % 60;
4781 25 : oFeature.SetField(iOGRFieldIndex, 0, 0, 0, nHour, nMinute,
4782 25 : static_cast<float>(nSecond + floatingPart));
4783 : }
4784 772 : else if (format[0] == 't' && format[1] == 't' &&
4785 14 : (format[2] == 'u' || // time64 [microseconds]
4786 7 : format[2] == 'n')) // time64 [nanoseconds]
4787 : {
4788 14 : int64_t value =
4789 14 : static_cast<const int64_t *>(array->buffers[1])[nOffsettedIndex];
4790 14 : if (oFeature.GetFieldDefnRef(iOGRFieldIndex)->GetType() == OFTInteger64)
4791 : {
4792 2 : oFeature.SetField(iOGRFieldIndex, static_cast<GIntBig>(value));
4793 : }
4794 : else
4795 : {
4796 : double floatingPart;
4797 12 : if (format[2] == 'u')
4798 : {
4799 5 : floatingPart = (value % (1000 * 1000)) / 1e6;
4800 5 : value /= 1000 * 1000;
4801 : }
4802 : else
4803 : {
4804 7 : floatingPart = (value % (1000 * 1000 * 1000)) / 1e9;
4805 7 : value /= 1000 * 1000 * 1000;
4806 : }
4807 12 : const int nHour = static_cast<int>(value / 3600);
4808 12 : const int nMinute = static_cast<int>((value / 60) % 60);
4809 12 : const int nSecond = static_cast<int>(value % 60);
4810 12 : oFeature.SetField(iOGRFieldIndex, 0, 0, 0, nHour, nMinute,
4811 12 : static_cast<float>(nSecond + floatingPart));
4812 14 : }
4813 : }
4814 758 : else if (IsTimestampSeconds(format))
4815 : {
4816 0 : ArrowTimestampToOGRDateTime(
4817 0 : static_cast<const int64_t *>(array->buffers[1])[nOffsettedIndex], 1,
4818 : GetTimestampTimezone(format), oFeature, iOGRFieldIndex);
4819 : }
4820 758 : else if (IsTimestampMilliseconds(format))
4821 : {
4822 73 : ArrowTimestampToOGRDateTime(
4823 73 : static_cast<const int64_t *>(array->buffers[1])[nOffsettedIndex],
4824 : 1000, GetTimestampTimezone(format), oFeature, iOGRFieldIndex);
4825 : }
4826 685 : else if (IsTimestampMicroseconds(format))
4827 : {
4828 34 : ArrowTimestampToOGRDateTime(
4829 34 : static_cast<const int64_t *>(array->buffers[1])[nOffsettedIndex],
4830 : 1000 * 1000, GetTimestampTimezone(format), oFeature,
4831 : iOGRFieldIndex);
4832 : }
4833 651 : else if (IsTimestampNanoseconds(format))
4834 : {
4835 0 : ArrowTimestampToOGRDateTime(
4836 0 : static_cast<const int64_t *>(array->buffers[1])[nOffsettedIndex],
4837 : 1000 * 1000 * 1000, GetTimestampTimezone(format), oFeature,
4838 : iOGRFieldIndex);
4839 : }
4840 651 : else if (IsFixedSizeList(format))
4841 : {
4842 154 : const int nItems = GetFixedSizeList(format);
4843 154 : const auto childArray = array->children[0];
4844 154 : const char *childFormat = schema->children[0]->format;
4845 154 : if (IsBoolean(childFormat))
4846 : {
4847 24 : std::vector<int> aValues;
4848 12 : const auto *paValues =
4849 12 : static_cast<const uint8_t *>(childArray->buffers[1]);
4850 36 : for (int i = 0; i < nItems; ++i)
4851 : {
4852 24 : aValues.push_back(
4853 24 : TestBit(paValues,
4854 24 : static_cast<size_t>(childArray->offset +
4855 24 : nOffsettedIndex * nItems + i))
4856 24 : ? 1
4857 : : 0);
4858 : }
4859 12 : oFeature.SetField(iOGRFieldIndex, static_cast<int>(aValues.size()),
4860 12 : aValues.data());
4861 : }
4862 142 : else if (IsInt8(childFormat))
4863 : {
4864 12 : FillFieldFixedSizeList<int8_t, int>(array, iOGRFieldIndex,
4865 : nOffsettedIndex, nItems,
4866 : childArray, oFeature);
4867 : }
4868 130 : else if (IsUInt8(childFormat))
4869 : {
4870 12 : FillFieldFixedSizeList<uint8_t, int>(array, iOGRFieldIndex,
4871 : nOffsettedIndex, nItems,
4872 : childArray, oFeature);
4873 : }
4874 118 : else if (IsInt16(childFormat))
4875 : {
4876 12 : FillFieldFixedSizeList<int16_t, int>(array, iOGRFieldIndex,
4877 : nOffsettedIndex, nItems,
4878 : childArray, oFeature);
4879 : }
4880 106 : else if (IsUInt16(childFormat))
4881 : {
4882 12 : FillFieldFixedSizeList<uint16_t, int>(array, iOGRFieldIndex,
4883 : nOffsettedIndex, nItems,
4884 : childArray, oFeature);
4885 : }
4886 94 : else if (IsInt32(childFormat))
4887 : {
4888 12 : FillFieldFixedSizeList<int32_t, int>(array, iOGRFieldIndex,
4889 : nOffsettedIndex, nItems,
4890 : childArray, oFeature);
4891 : }
4892 82 : else if (IsUInt32(childFormat))
4893 : {
4894 5 : FillFieldFixedSizeList<uint32_t, GIntBig>(array, iOGRFieldIndex,
4895 : nOffsettedIndex, nItems,
4896 : childArray, oFeature);
4897 : }
4898 77 : else if (IsInt64(childFormat))
4899 : {
4900 19 : FillFieldFixedSizeList<int64_t, GIntBig>(array, iOGRFieldIndex,
4901 : nOffsettedIndex, nItems,
4902 : childArray, oFeature);
4903 : }
4904 58 : else if (IsUInt64(childFormat))
4905 : {
4906 12 : FillFieldFixedSizeList<uint64_t, double>(array, iOGRFieldIndex,
4907 : nOffsettedIndex, nItems,
4908 : childArray, oFeature);
4909 : }
4910 46 : else if (IsFloat16(childFormat))
4911 : {
4912 10 : std::vector<double> aValues;
4913 15 : for (int i = 0; i < nItems; ++i)
4914 : {
4915 10 : aValues.push_back(static_cast<double>(
4916 10 : GetValueFloat16(childArray, nOffsettedIndex * nItems + i)));
4917 : }
4918 5 : oFeature.SetField(iOGRFieldIndex, static_cast<int>(aValues.size()),
4919 5 : aValues.data());
4920 : }
4921 41 : else if (IsFloat32(childFormat))
4922 : {
4923 12 : FillFieldFixedSizeList<float, double>(array, iOGRFieldIndex,
4924 : nOffsettedIndex, nItems,
4925 : childArray, oFeature);
4926 : }
4927 29 : else if (IsFloat64(childFormat))
4928 : {
4929 12 : FillFieldFixedSizeList<double, double>(array, iOGRFieldIndex,
4930 : nOffsettedIndex, nItems,
4931 : childArray, oFeature);
4932 : }
4933 17 : else if (IsString(childFormat))
4934 : {
4935 12 : FillFieldFixedSizeListString<uint32_t>(array, iOGRFieldIndex,
4936 : nOffsettedIndex, nItems,
4937 : childArray, oFeature);
4938 : }
4939 5 : else if (IsLargeString(childFormat))
4940 : {
4941 5 : FillFieldFixedSizeListString<uint64_t>(array, iOGRFieldIndex,
4942 : nOffsettedIndex, nItems,
4943 : childArray, oFeature);
4944 : }
4945 0 : else if (IsStringView(childFormat))
4946 : {
4947 0 : FillFieldFixedSizeListStringView(array, iOGRFieldIndex,
4948 : nOffsettedIndex, nItems,
4949 : childArray, oFeature);
4950 : }
4951 : }
4952 497 : else if (IsList(format) || IsLargeList(format))
4953 : {
4954 254 : const auto childArray = array->children[0];
4955 254 : const char *childFormat = schema->children[0]->format;
4956 254 : if (IsBoolean(childFormat))
4957 : {
4958 16 : if (format[1] == ARROW_2ND_LETTER_LIST)
4959 12 : FillFieldListFromBool<uint32_t>(array, iOGRFieldIndex,
4960 : nOffsettedIndex, childArray,
4961 : oFeature);
4962 : else
4963 4 : FillFieldListFromBool<uint64_t>(array, iOGRFieldIndex,
4964 : nOffsettedIndex, childArray,
4965 : oFeature);
4966 : }
4967 238 : else if (IsInt8(childFormat))
4968 : {
4969 14 : if (format[1] == ARROW_2ND_LETTER_LIST)
4970 10 : FillFieldList<uint32_t, int8_t, int>(array, iOGRFieldIndex,
4971 : nOffsettedIndex,
4972 : childArray, oFeature);
4973 : else
4974 4 : FillFieldList<uint64_t, int8_t, int>(array, iOGRFieldIndex,
4975 : nOffsettedIndex,
4976 : childArray, oFeature);
4977 : }
4978 224 : else if (IsUInt8(childFormat))
4979 : {
4980 18 : if (format[1] == ARROW_2ND_LETTER_LIST)
4981 14 : FillFieldList<uint32_t, uint8_t, int>(array, iOGRFieldIndex,
4982 : nOffsettedIndex,
4983 : childArray, oFeature);
4984 : else
4985 4 : FillFieldList<uint64_t, uint8_t, int>(array, iOGRFieldIndex,
4986 : nOffsettedIndex,
4987 : childArray, oFeature);
4988 : }
4989 206 : else if (IsInt16(childFormat))
4990 : {
4991 16 : if (format[1] == ARROW_2ND_LETTER_LIST)
4992 12 : FillFieldList<uint32_t, int16_t, int>(array, iOGRFieldIndex,
4993 : nOffsettedIndex,
4994 : childArray, oFeature);
4995 : else
4996 4 : FillFieldList<uint64_t, int16_t, int>(array, iOGRFieldIndex,
4997 : nOffsettedIndex,
4998 : childArray, oFeature);
4999 : }
5000 190 : else if (IsUInt16(childFormat))
5001 : {
5002 14 : if (format[1] == ARROW_2ND_LETTER_LIST)
5003 10 : FillFieldList<uint32_t, uint16_t, int>(array, iOGRFieldIndex,
5004 : nOffsettedIndex,
5005 : childArray, oFeature);
5006 : else
5007 4 : FillFieldList<uint64_t, uint16_t, int>(array, iOGRFieldIndex,
5008 : nOffsettedIndex,
5009 : childArray, oFeature);
5010 : }
5011 176 : else if (IsInt32(childFormat))
5012 : {
5013 18 : if (format[1] == ARROW_2ND_LETTER_LIST)
5014 14 : FillFieldList<uint32_t, int32_t, int>(array, iOGRFieldIndex,
5015 : nOffsettedIndex,
5016 : childArray, oFeature);
5017 : else
5018 4 : FillFieldList<uint64_t, int32_t, int>(array, iOGRFieldIndex,
5019 : nOffsettedIndex,
5020 : childArray, oFeature);
5021 : }
5022 158 : else if (IsUInt32(childFormat))
5023 : {
5024 8 : if (format[1] == ARROW_2ND_LETTER_LIST)
5025 4 : FillFieldList<uint32_t, uint32_t, GIntBig>(
5026 : array, iOGRFieldIndex, nOffsettedIndex, childArray,
5027 : oFeature);
5028 : else
5029 4 : FillFieldList<uint64_t, uint32_t, GIntBig>(
5030 : array, iOGRFieldIndex, nOffsettedIndex, childArray,
5031 : oFeature);
5032 : }
5033 150 : else if (IsInt64(childFormat))
5034 : {
5035 31 : if (format[1] == ARROW_2ND_LETTER_LIST)
5036 27 : FillFieldList<uint32_t, int64_t, GIntBig>(array, iOGRFieldIndex,
5037 : nOffsettedIndex,
5038 : childArray, oFeature);
5039 : else
5040 4 : FillFieldList<uint64_t, int64_t, GIntBig>(array, iOGRFieldIndex,
5041 : nOffsettedIndex,
5042 : childArray, oFeature);
5043 : }
5044 119 : else if (IsUInt64(childFormat)) // (lossy conversion)
5045 : {
5046 14 : if (format[1] == ARROW_2ND_LETTER_LIST)
5047 10 : FillFieldList<uint32_t, uint64_t, double>(array, iOGRFieldIndex,
5048 : nOffsettedIndex,
5049 : childArray, oFeature);
5050 : else
5051 4 : FillFieldList<uint64_t, uint64_t, double>(array, iOGRFieldIndex,
5052 : nOffsettedIndex,
5053 : childArray, oFeature);
5054 : }
5055 105 : else if (IsFloat16(childFormat))
5056 : {
5057 8 : if (format[1] == ARROW_2ND_LETTER_LIST)
5058 4 : FillFieldListFromHalfFloat<uint32_t>(array, iOGRFieldIndex,
5059 : nOffsettedIndex,
5060 : childArray, oFeature);
5061 : else
5062 4 : FillFieldListFromHalfFloat<uint64_t>(array, iOGRFieldIndex,
5063 : nOffsettedIndex,
5064 : childArray, oFeature);
5065 : }
5066 97 : else if (IsFloat32(childFormat))
5067 : {
5068 16 : if (format[1] == ARROW_2ND_LETTER_LIST)
5069 12 : FillFieldList<uint32_t, float, double>(array, iOGRFieldIndex,
5070 : nOffsettedIndex,
5071 : childArray, oFeature);
5072 : else
5073 4 : FillFieldList<uint64_t, float, double>(array, iOGRFieldIndex,
5074 : nOffsettedIndex,
5075 : childArray, oFeature);
5076 : }
5077 81 : else if (IsFloat64(childFormat))
5078 : {
5079 18 : if (format[1] == ARROW_2ND_LETTER_LIST)
5080 14 : FillFieldList<uint32_t, double, double>(array, iOGRFieldIndex,
5081 : nOffsettedIndex,
5082 : childArray, oFeature);
5083 : else
5084 4 : FillFieldList<uint64_t, double, double>(array, iOGRFieldIndex,
5085 : nOffsettedIndex,
5086 : childArray, oFeature);
5087 : }
5088 63 : else if (IsString(childFormat))
5089 : {
5090 18 : if (format[1] == ARROW_2ND_LETTER_LIST)
5091 14 : FillFieldListFromString<uint32_t, uint32_t>(
5092 : array, iOGRFieldIndex, nOffsettedIndex, childArray,
5093 : oFeature);
5094 : else
5095 4 : FillFieldListFromString<uint64_t, uint32_t>(
5096 : array, iOGRFieldIndex, nOffsettedIndex, childArray,
5097 : oFeature);
5098 : }
5099 45 : else if (IsLargeString(childFormat))
5100 : {
5101 14 : if (format[1] == ARROW_2ND_LETTER_LIST)
5102 10 : FillFieldListFromString<uint32_t, uint64_t>(
5103 : array, iOGRFieldIndex, nOffsettedIndex, childArray,
5104 : oFeature);
5105 : else
5106 4 : FillFieldListFromString<uint64_t, uint64_t>(
5107 : array, iOGRFieldIndex, nOffsettedIndex, childArray,
5108 : oFeature);
5109 : }
5110 31 : else if (IsStringView(childFormat))
5111 : {
5112 0 : if (format[1] == ARROW_2ND_LETTER_LIST)
5113 0 : FillFieldListFromStringView<uint32_t>(array, iOGRFieldIndex,
5114 : nOffsettedIndex,
5115 : childArray, oFeature);
5116 : else
5117 0 : FillFieldListFromStringView<uint64_t>(array, iOGRFieldIndex,
5118 : nOffsettedIndex,
5119 : childArray, oFeature);
5120 : }
5121 :
5122 31 : else if (format[1] == ARROW_2ND_LETTER_LIST)
5123 : {
5124 31 : const size_t iFeature =
5125 31 : static_cast<size_t>(nOffsettedIndex - array->offset);
5126 31 : oFeature.SetField(iOGRFieldIndex,
5127 62 : GetListAsJSON<uint32_t>(schema, array, iFeature)
5128 62 : .Format(CPLJSONObject::PrettyFormat::Plain)
5129 : .c_str());
5130 : }
5131 : else
5132 : {
5133 0 : const size_t iFeature =
5134 0 : static_cast<size_t>(nOffsettedIndex - array->offset);
5135 0 : oFeature.SetField(iOGRFieldIndex,
5136 0 : GetListAsJSON<uint64_t>(schema, array, iFeature)
5137 0 : .Format(CPLJSONObject::PrettyFormat::Plain)
5138 : .c_str());
5139 : }
5140 : }
5141 243 : else if (IsDecimal(format))
5142 : {
5143 4 : int nPrecision = 0;
5144 4 : int nScale = 0;
5145 4 : int nWidthInBytes = 0;
5146 4 : if (!ParseDecimalFormat(format, nPrecision, nScale, nWidthInBytes))
5147 : {
5148 0 : CPLAssert(false);
5149 : }
5150 :
5151 : // fits on a int64
5152 4 : CPLAssert(nPrecision <= 19);
5153 : // either 128 or 256 bits
5154 4 : CPLAssert((nWidthInBytes % 8) == 0);
5155 4 : const int nWidthIn64BitWord = nWidthInBytes / 8;
5156 4 : const size_t iFeature =
5157 4 : static_cast<size_t>(nOffsettedIndex - array->offset);
5158 4 : oFeature.SetField(
5159 : iOGRFieldIndex,
5160 : GetValueDecimal(array, nWidthIn64BitWord, nScale, iFeature));
5161 4 : return true;
5162 : }
5163 239 : else if (IsMap(format))
5164 : {
5165 239 : const size_t iFeature =
5166 239 : static_cast<size_t>(nOffsettedIndex - array->offset);
5167 239 : oFeature.SetField(iOGRFieldIndex,
5168 478 : GetMapAsJSON(schema, array, iFeature)
5169 478 : .Format(CPLJSONObject::PrettyFormat::Plain)
5170 : .c_str());
5171 : }
5172 : else
5173 : {
5174 0 : return false;
5175 : }
5176 :
5177 819 : return true;
5178 : }
5179 :
5180 : /************************************************************************/
5181 : /* FillValidityArrayFromAttrQuery() */
5182 : /************************************************************************/
5183 :
5184 134 : static size_t FillValidityArrayFromAttrQuery(
5185 : const OGRLayer *poLayer, OGRFeatureQuery *poAttrQuery,
5186 : const struct ArrowSchema *schema, struct ArrowArray *array,
5187 : std::vector<bool> &abyValidityFromFilters, CSLConstList papszOptions)
5188 : {
5189 134 : size_t nCountIntersecting = 0;
5190 134 : auto poFeatureDefn = const_cast<OGRLayer *>(poLayer)->GetLayerDefn();
5191 268 : OGRFeature oFeature(poFeatureDefn);
5192 :
5193 268 : std::map<std::string, std::vector<int>> oMapFieldNameToArrowPath;
5194 268 : std::vector<int> anArrowPathTmp;
5195 134 : BuildMapFieldNameToArrowPath(schema, oMapFieldNameToArrowPath,
5196 268 : std::string(), anArrowPathTmp);
5197 :
5198 : struct UsedFieldsInfo
5199 : {
5200 : int iOGRFieldIndex{};
5201 : std::vector<int> anArrowPath{};
5202 : };
5203 :
5204 268 : std::vector<UsedFieldsInfo> aoUsedFieldsInfo;
5205 :
5206 134 : bool bNeedsFID = false;
5207 268 : const CPLStringList aosUsedFields(poAttrQuery->GetUsedFields());
5208 252 : for (int i = 0; i < aosUsedFields.size(); ++i)
5209 : {
5210 118 : int iOGRFieldIndex = poFeatureDefn->GetFieldIndex(aosUsedFields[i]);
5211 118 : if (iOGRFieldIndex >= 0)
5212 : {
5213 112 : const auto oIter = oMapFieldNameToArrowPath.find(aosUsedFields[i]);
5214 112 : if (oIter != oMapFieldNameToArrowPath.end())
5215 : {
5216 224 : UsedFieldsInfo info;
5217 112 : info.iOGRFieldIndex = iOGRFieldIndex;
5218 112 : info.anArrowPath = oIter->second;
5219 112 : aoUsedFieldsInfo.push_back(std::move(info));
5220 : }
5221 : else
5222 : {
5223 0 : CPLError(CE_Failure, CPLE_AppDefined,
5224 : "Cannot find %s in oMapFieldNameToArrowPath",
5225 : aosUsedFields[i]);
5226 : }
5227 : }
5228 6 : else if (EQUAL(aosUsedFields[i], "FID"))
5229 : {
5230 6 : bNeedsFID = true;
5231 : }
5232 : else
5233 : {
5234 0 : CPLDebug("OGR", "Cannot find used field %s", aosUsedFields[i]);
5235 : }
5236 : }
5237 :
5238 134 : const size_t nLength = abyValidityFromFilters.size();
5239 :
5240 134 : GIntBig nBaseSeqFID = -1;
5241 268 : std::vector<int> anArrowPathToFIDColumn;
5242 134 : if (bNeedsFID)
5243 : {
5244 : // BASE_SEQUENTIAL_FID is set when there is no Arrow column for the FID
5245 : // and we assume sequential FID numbering
5246 : const char *pszBaseSeqFID =
5247 6 : CSLFetchNameValue(papszOptions, "BASE_SEQUENTIAL_FID");
5248 6 : if (pszBaseSeqFID)
5249 : {
5250 5 : nBaseSeqFID = CPLAtoGIntBig(pszBaseSeqFID);
5251 :
5252 : // Optimizimation for "FID = constant"
5253 : swq_expr_node *poNode =
5254 5 : static_cast<swq_expr_node *>(poAttrQuery->GetSWQExpr());
5255 15 : if (poNode->eNodeType == SNT_OPERATION &&
5256 5 : poNode->nOperation == SWQ_EQ && poNode->nSubExprCount == 2 &&
5257 2 : poNode->papoSubExpr[0]->eNodeType == SNT_COLUMN &&
5258 2 : poNode->papoSubExpr[1]->eNodeType == SNT_CONSTANT &&
5259 2 : poNode->papoSubExpr[0]->field_index ==
5260 12 : poFeatureDefn->GetFieldCount() + SPF_FID &&
5261 2 : poNode->papoSubExpr[1]->field_type == SWQ_INTEGER64)
5262 : {
5263 2 : if (nBaseSeqFID + static_cast<int64_t>(nLength) <
5264 2 : poNode->papoSubExpr[1]->int_value ||
5265 2 : nBaseSeqFID > poNode->papoSubExpr[1]->int_value)
5266 : {
5267 0 : return 0;
5268 : }
5269 : }
5270 : }
5271 : else
5272 : {
5273 : const char *pszFIDColumn =
5274 1 : const_cast<OGRLayer *>(poLayer)->GetFIDColumn();
5275 1 : if (pszFIDColumn && pszFIDColumn[0])
5276 : {
5277 1 : const auto oIter = oMapFieldNameToArrowPath.find(pszFIDColumn);
5278 1 : if (oIter != oMapFieldNameToArrowPath.end())
5279 : {
5280 1 : anArrowPathToFIDColumn = oIter->second;
5281 : }
5282 : }
5283 1 : if (anArrowPathToFIDColumn.empty())
5284 : {
5285 0 : CPLError(CE_Failure, CPLE_AppDefined,
5286 : "Filtering on FID requested but cannot associate a "
5287 : "FID with Arrow records");
5288 : }
5289 : }
5290 : }
5291 :
5292 555 : for (size_t iRow = 0; iRow < nLength; ++iRow)
5293 : {
5294 421 : if (!abyValidityFromFilters[iRow])
5295 2 : continue;
5296 :
5297 419 : if (bNeedsFID)
5298 : {
5299 21 : if (nBaseSeqFID >= 0)
5300 : {
5301 11 : oFeature.SetFID(nBaseSeqFID + iRow);
5302 : }
5303 10 : else if (!anArrowPathToFIDColumn.empty())
5304 : {
5305 10 : oFeature.SetFID(OGRNullFID);
5306 :
5307 10 : const struct ArrowSchema *psSchemaField = schema;
5308 10 : const struct ArrowArray *psArray = array;
5309 10 : bool bSkip = false;
5310 20 : for (size_t i = 0; i < anArrowPathToFIDColumn.size(); ++i)
5311 : {
5312 10 : const int iChild = anArrowPathToFIDColumn[i];
5313 10 : if (i > 0)
5314 : {
5315 0 : const uint8_t *pabyValidity =
5316 0 : psArray->null_count == 0
5317 0 : ? nullptr
5318 : : static_cast<uint8_t *>(
5319 0 : const_cast<void *>(psArray->buffers[0]));
5320 0 : const size_t nOffsettedIndex =
5321 0 : static_cast<size_t>(iRow + psArray->offset);
5322 0 : if (pabyValidity &&
5323 0 : !TestBit(pabyValidity, nOffsettedIndex))
5324 : {
5325 0 : bSkip = true;
5326 0 : break;
5327 : }
5328 : }
5329 :
5330 10 : psSchemaField = psSchemaField->children[iChild];
5331 10 : psArray = psArray->children[iChild];
5332 : }
5333 10 : if (bSkip)
5334 0 : continue;
5335 :
5336 10 : const char *format = psSchemaField->format;
5337 10 : const uint8_t *pabyValidity =
5338 10 : psArray->null_count == 0
5339 10 : ? nullptr
5340 : : static_cast<uint8_t *>(
5341 0 : const_cast<void *>(psArray->buffers[0]));
5342 10 : const size_t nOffsettedIndex =
5343 10 : static_cast<size_t>(iRow + psArray->offset);
5344 10 : if (pabyValidity && !TestBit(pabyValidity, nOffsettedIndex))
5345 : {
5346 : // do nothing
5347 : }
5348 10 : else if (IsInt32(format))
5349 : {
5350 0 : oFeature.SetFID(static_cast<const int32_t *>(
5351 0 : psArray->buffers[1])[nOffsettedIndex]);
5352 : }
5353 10 : else if (IsInt64(format))
5354 : {
5355 10 : oFeature.SetFID(static_cast<const int64_t *>(
5356 10 : psArray->buffers[1])[nOffsettedIndex]);
5357 : }
5358 : }
5359 : }
5360 :
5361 725 : for (const auto &sInfo : aoUsedFieldsInfo)
5362 : {
5363 306 : const int iOGRFieldIndex = sInfo.iOGRFieldIndex;
5364 306 : const struct ArrowSchema *psSchemaField = schema;
5365 306 : const struct ArrowArray *psArray = array;
5366 306 : bool bSkip = false;
5367 612 : for (size_t i = 0; i < sInfo.anArrowPath.size(); ++i)
5368 : {
5369 306 : const int iChild = sInfo.anArrowPath[i];
5370 306 : if (i > 0)
5371 : {
5372 0 : const uint8_t *pabyValidity =
5373 0 : psArray->null_count == 0
5374 0 : ? nullptr
5375 : : static_cast<uint8_t *>(
5376 0 : const_cast<void *>(psArray->buffers[0]));
5377 0 : const size_t nOffsettedIndex =
5378 0 : static_cast<size_t>(iRow + psArray->offset);
5379 0 : if (pabyValidity && !TestBit(pabyValidity, nOffsettedIndex))
5380 : {
5381 0 : bSkip = true;
5382 0 : oFeature.SetFieldNull(iOGRFieldIndex);
5383 0 : break;
5384 : }
5385 : }
5386 :
5387 306 : psSchemaField = psSchemaField->children[iChild];
5388 306 : psArray = psArray->children[iChild];
5389 : }
5390 306 : if (bSkip)
5391 0 : continue;
5392 :
5393 306 : const char *format = psSchemaField->format;
5394 306 : const uint8_t *pabyValidity =
5395 306 : psArray->null_count == 0
5396 306 : ? nullptr
5397 : : static_cast<uint8_t *>(
5398 129 : const_cast<void *>(psArray->buffers[0]));
5399 306 : const size_t nOffsettedIndex =
5400 306 : static_cast<size_t>(iRow + psArray->offset);
5401 306 : if (pabyValidity && !TestBit(pabyValidity, nOffsettedIndex))
5402 : {
5403 38 : oFeature.SetFieldNull(iOGRFieldIndex);
5404 : }
5405 268 : else if (IsBoolean(format))
5406 : {
5407 78 : oFeature.SetField(
5408 : iOGRFieldIndex,
5409 78 : TestBit(static_cast<const uint8_t *>(psArray->buffers[1]),
5410 : nOffsettedIndex));
5411 : }
5412 190 : else if (IsInt8(format))
5413 : {
5414 8 : oFeature.SetField(iOGRFieldIndex,
5415 8 : static_cast<const int8_t *>(
5416 8 : psArray->buffers[1])[nOffsettedIndex]);
5417 : }
5418 182 : else if (IsUInt8(format))
5419 : {
5420 4 : oFeature.SetField(iOGRFieldIndex,
5421 4 : static_cast<const uint8_t *>(
5422 4 : psArray->buffers[1])[nOffsettedIndex]);
5423 : }
5424 178 : else if (IsInt16(format))
5425 : {
5426 16 : oFeature.SetField(iOGRFieldIndex,
5427 16 : static_cast<const int16_t *>(
5428 16 : psArray->buffers[1])[nOffsettedIndex]);
5429 : }
5430 162 : else if (IsUInt16(format))
5431 : {
5432 2 : oFeature.SetField(iOGRFieldIndex,
5433 2 : static_cast<const uint16_t *>(
5434 2 : psArray->buffers[1])[nOffsettedIndex]);
5435 : }
5436 160 : else if (IsInt32(format))
5437 : {
5438 10 : oFeature.SetField(iOGRFieldIndex,
5439 10 : static_cast<const int32_t *>(
5440 10 : psArray->buffers[1])[nOffsettedIndex]);
5441 : }
5442 150 : else if (IsUInt32(format))
5443 : {
5444 0 : oFeature.SetField(
5445 : iOGRFieldIndex,
5446 0 : static_cast<GIntBig>(static_cast<const uint32_t *>(
5447 0 : psArray->buffers[1])[nOffsettedIndex]));
5448 : }
5449 150 : else if (IsInt64(format))
5450 : {
5451 4 : oFeature.SetField(
5452 : iOGRFieldIndex,
5453 4 : static_cast<GIntBig>(static_cast<const int64_t *>(
5454 4 : psArray->buffers[1])[nOffsettedIndex]));
5455 : }
5456 146 : else if (IsUInt64(format))
5457 : {
5458 4 : oFeature.SetField(
5459 : iOGRFieldIndex,
5460 4 : static_cast<double>(static_cast<const uint64_t *>(
5461 4 : psArray->buffers[1])[nOffsettedIndex]));
5462 : }
5463 142 : else if (IsFloat32(format))
5464 : {
5465 2 : oFeature.SetField(
5466 : iOGRFieldIndex,
5467 2 : static_cast<double>(static_cast<const float *>(
5468 2 : psArray->buffers[1])[nOffsettedIndex]));
5469 : }
5470 140 : else if (IsFloat64(format))
5471 : {
5472 26 : oFeature.SetField(iOGRFieldIndex,
5473 26 : static_cast<const double *>(
5474 26 : psArray->buffers[1])[nOffsettedIndex]);
5475 : }
5476 114 : else if (IsString(format))
5477 : {
5478 18 : const auto nOffset = static_cast<const uint32_t *>(
5479 18 : psArray->buffers[1])[nOffsettedIndex];
5480 18 : const auto nNextOffset = static_cast<const uint32_t *>(
5481 18 : psArray->buffers[1])[nOffsettedIndex + 1];
5482 18 : const GByte *pabyData =
5483 18 : static_cast<const GByte *>(psArray->buffers[2]);
5484 18 : const uint32_t nSize = nNextOffset - nOffset;
5485 18 : CPLAssert(oFeature.GetFieldDefnRef(iOGRFieldIndex)->GetType() ==
5486 : OFTString);
5487 18 : char *pszStr = static_cast<char *>(CPLMalloc(nSize + 1));
5488 18 : memcpy(pszStr, pabyData + nOffset, nSize);
5489 18 : pszStr[nSize] = 0;
5490 18 : OGRField *psField = oFeature.GetRawFieldRef(iOGRFieldIndex);
5491 18 : if (IsValidField(psField))
5492 12 : CPLFree(psField->String);
5493 18 : psField->String = pszStr;
5494 : }
5495 96 : else if (IsStringView(format))
5496 : {
5497 : // Cf https://arrow.apache.org/docs/format/Columnar.html#variable-size-binary-view-layout
5498 0 : CPLAssert(oFeature.GetFieldDefnRef(iOGRFieldIndex)->GetType() ==
5499 : OFTString);
5500 0 : const auto strView = GetStringView(psArray, iRow);
5501 0 : const auto nSize = strView.size();
5502 0 : char *pszStr = static_cast<char *>(CPLMalloc(nSize + 1));
5503 0 : memcpy(pszStr, strView.data(), nSize);
5504 0 : pszStr[nSize] = 0;
5505 0 : OGRField *psField = oFeature.GetRawFieldRef(iOGRFieldIndex);
5506 0 : if (IsValidField(psField))
5507 0 : CPLFree(psField->String);
5508 0 : psField->String = pszStr;
5509 : }
5510 96 : else if (IsLargeString(format))
5511 : {
5512 6 : const auto nOffset = static_cast<const uint64_t *>(
5513 6 : psArray->buffers[1])[nOffsettedIndex];
5514 6 : const auto nNextOffset = static_cast<const uint64_t *>(
5515 6 : psArray->buffers[1])[nOffsettedIndex + 1];
5516 6 : const GByte *pabyData =
5517 6 : static_cast<const GByte *>(psArray->buffers[2]);
5518 6 : const size_t nSize = static_cast<size_t>(nNextOffset - nOffset);
5519 6 : char *pszStr = static_cast<char *>(CPLMalloc(nSize + 1));
5520 6 : memcpy(pszStr, pabyData + static_cast<size_t>(nOffset), nSize);
5521 6 : pszStr[nSize] = 0;
5522 6 : OGRField *psField = oFeature.GetRawFieldRef(iOGRFieldIndex);
5523 6 : if (IsValidField(psField))
5524 3 : CPLFree(psField->String);
5525 6 : psField->String = pszStr;
5526 : }
5527 90 : else if (IsBinary(format))
5528 : {
5529 5 : const auto nOffset = static_cast<const uint32_t *>(
5530 5 : psArray->buffers[1])[nOffsettedIndex];
5531 5 : const auto nNextOffset = static_cast<const uint32_t *>(
5532 5 : psArray->buffers[1])[nOffsettedIndex + 1];
5533 5 : const GByte *pabyData =
5534 5 : static_cast<const GByte *>(psArray->buffers[2]);
5535 5 : const uint32_t nSize = nNextOffset - nOffset;
5536 10 : if (nSize >
5537 5 : static_cast<size_t>(std::numeric_limits<int32_t>::max()))
5538 : {
5539 0 : abyValidityFromFilters.clear();
5540 0 : abyValidityFromFilters.resize(nLength);
5541 0 : CPLError(CE_Failure, CPLE_AppDefined,
5542 : "Unexpected error in PostFilterArrowArray(): too "
5543 : "large binary");
5544 0 : return 0;
5545 : }
5546 5 : oFeature.SetField(iOGRFieldIndex, static_cast<int>(nSize),
5547 5 : pabyData + nOffset);
5548 : }
5549 85 : else if (IsLargeBinary(format))
5550 : {
5551 5 : const auto nOffset = static_cast<const uint64_t *>(
5552 5 : psArray->buffers[1])[nOffsettedIndex];
5553 5 : const auto nNextOffset = static_cast<const uint64_t *>(
5554 5 : psArray->buffers[1])[nOffsettedIndex + 1];
5555 5 : const GByte *pabyData =
5556 5 : static_cast<const GByte *>(psArray->buffers[2]);
5557 5 : const uint64_t nSize = nNextOffset - nOffset;
5558 5 : if (nSize >
5559 5 : static_cast<uint64_t>(std::numeric_limits<int32_t>::max()))
5560 : {
5561 0 : abyValidityFromFilters.clear();
5562 0 : abyValidityFromFilters.resize(nLength);
5563 0 : CPLError(CE_Failure, CPLE_AppDefined,
5564 : "Unexpected error in PostFilterArrowArray(): too "
5565 : "large binary");
5566 0 : return 0;
5567 : }
5568 5 : oFeature.SetField(iOGRFieldIndex, static_cast<int>(nSize),
5569 5 : pabyData + nOffset);
5570 : }
5571 80 : else if (!SetFieldForOtherFormats(oFeature, iOGRFieldIndex,
5572 : nOffsettedIndex, psSchemaField,
5573 : psArray))
5574 : {
5575 0 : abyValidityFromFilters.clear();
5576 0 : abyValidityFromFilters.resize(nLength);
5577 0 : CPLError(
5578 : CE_Failure, CPLE_AppDefined,
5579 : "Unexpected error in PostFilterArrowArray(): unhandled "
5580 : "field format: %s",
5581 : format);
5582 0 : return 0;
5583 : }
5584 : }
5585 419 : if (poAttrQuery->Evaluate(&oFeature))
5586 : {
5587 215 : nCountIntersecting++;
5588 : }
5589 : else
5590 : {
5591 204 : abyValidityFromFilters[iRow] = false;
5592 : }
5593 : }
5594 134 : return nCountIntersecting;
5595 : }
5596 :
5597 : /************************************************************************/
5598 : /* OGRLayer::PostFilterArrowArray() */
5599 : /************************************************************************/
5600 :
5601 : /** Remove rows that aren't selected by the spatial or attribute filter.
5602 : *
5603 : * Assumes that CanPostFilterArrowArray() has been called and returned true.
5604 : */
5605 153 : void OGRLayer::PostFilterArrowArray(const struct ArrowSchema *schema,
5606 : struct ArrowArray *array,
5607 : CSLConstList papszOptions) const
5608 : {
5609 153 : if (!m_poFilterGeom && !m_poAttrQuery)
5610 43 : return;
5611 :
5612 153 : CPLAssert(schema->n_children == array->n_children);
5613 :
5614 153 : int64_t iGeomField = -1;
5615 153 : if (m_poFilterGeom)
5616 : {
5617 : const char *pszGeomFieldName =
5618 : const_cast<OGRLayer *>(this)
5619 21 : ->GetLayerDefn()
5620 21 : ->GetGeomFieldDefn(m_iGeomFieldFilter)
5621 21 : ->GetNameRef();
5622 837 : for (int64_t iField = 0; iField < schema->n_children; ++iField)
5623 : {
5624 837 : const auto fieldSchema = schema->children[iField];
5625 837 : if (strcmp(fieldSchema->name, pszGeomFieldName) == 0)
5626 : {
5627 21 : iGeomField = iField;
5628 21 : break;
5629 : }
5630 816 : CPLAssert(array->children[iField]->length ==
5631 : array->children[0]->length);
5632 : }
5633 : // Guaranteed if CanPostFilterArrowArray() returned true
5634 21 : CPLAssert(iGeomField >= 0);
5635 21 : CPLAssert(IsBinary(schema->children[iGeomField]->format) ||
5636 : IsLargeBinary(schema->children[iGeomField]->format));
5637 21 : CPLAssert(array->children[iGeomField]->n_buffers == 3);
5638 : }
5639 :
5640 153 : std::vector<bool> abyValidityFromFilters;
5641 153 : const size_t nLength = static_cast<size_t>(array->length);
5642 : const size_t nCountIntersectingGeom =
5643 174 : m_poFilterGeom ? (IsBinary(schema->children[iGeomField]->format)
5644 42 : ? FillValidityArrayFromWKBArray<uint32_t>(
5645 21 : array->children[iGeomField], this,
5646 : abyValidityFromFilters)
5647 0 : : FillValidityArrayFromWKBArray<uint64_t>(
5648 0 : array->children[iGeomField], this,
5649 : abyValidityFromFilters))
5650 153 : : nLength;
5651 153 : if (!m_poFilterGeom)
5652 132 : abyValidityFromFilters.resize(nLength, true);
5653 : const size_t nCountIntersecting =
5654 134 : m_poAttrQuery && nCountIntersectingGeom > 0
5655 306 : ? FillValidityArrayFromAttrQuery(this, m_poAttrQuery, schema, array,
5656 : abyValidityFromFilters,
5657 : papszOptions)
5658 19 : : m_poFilterGeom ? nCountIntersectingGeom
5659 153 : : nLength;
5660 : // Nothing to do ?
5661 153 : if (nCountIntersecting == nLength)
5662 : {
5663 : // CPLDebug("OGR", "All rows match filter");
5664 43 : return;
5665 : }
5666 :
5667 110 : if (nCountIntersecting == 0)
5668 : {
5669 27 : array->length = 0;
5670 : }
5671 83 : else if (!CompactStructArray(schema, array, 0, abyValidityFromFilters,
5672 : nCountIntersecting))
5673 : {
5674 0 : array->release(array);
5675 0 : memset(array, 0, sizeof(*array));
5676 : }
5677 : }
5678 :
5679 : /************************************************************************/
5680 : /* OGRCloneArrowArray */
5681 : /************************************************************************/
5682 :
5683 14093 : static bool OGRCloneArrowArray(const struct ArrowSchema *schema,
5684 : const struct ArrowArray *src_array,
5685 : struct ArrowArray *out_array,
5686 : size_t nParentOffset)
5687 : {
5688 14093 : memset(out_array, 0, sizeof(*out_array));
5689 14093 : const size_t nLength =
5690 14093 : static_cast<size_t>(src_array->length) - nParentOffset;
5691 14093 : out_array->length = nLength;
5692 14093 : out_array->null_count = src_array->null_count;
5693 14093 : out_array->release = OGRLayerDefaultReleaseArray;
5694 :
5695 14093 : bool bRet = true;
5696 :
5697 14093 : out_array->n_buffers = src_array->n_buffers;
5698 28186 : out_array->buffers = static_cast<const void **>(CPLCalloc(
5699 14093 : static_cast<size_t>(src_array->n_buffers), sizeof(const void *)));
5700 14093 : CPLAssert(static_cast<size_t>(src_array->length) >= nParentOffset);
5701 14093 : const char *format = schema->format;
5702 14093 : const auto nOffset = static_cast<size_t>(src_array->offset) + nParentOffset;
5703 41917 : for (int64_t i = 0; i < src_array->n_buffers; ++i)
5704 : {
5705 27824 : if (i == 0 || IsBoolean(format))
5706 : {
5707 14464 : if (i == 1)
5708 : {
5709 371 : CPLAssert(src_array->buffers[i]);
5710 : }
5711 14464 : if (src_array->buffers[i])
5712 : {
5713 8911 : const size_t nBytes = nLength ? (nLength + 7) / 8 : 1;
5714 : uint8_t *CPL_RESTRICT p = static_cast<uint8_t *>(
5715 8911 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nBytes));
5716 8911 : if (!p)
5717 : {
5718 0 : bRet = false;
5719 0 : break;
5720 : }
5721 8911 : const auto *CPL_RESTRICT pSrcArray =
5722 8911 : static_cast<const uint8_t *>(src_array->buffers[i]);
5723 8911 : if ((nOffset % 8) != 0)
5724 : {
5725 : // Make sure last byte is fully initialized
5726 2281 : p[nBytes - 1] = 0;
5727 7359 : for (size_t iRow = 0; iRow < nLength; ++iRow)
5728 : {
5729 5078 : if (TestBit(pSrcArray, nOffset + iRow))
5730 4949 : SetBit(p, iRow);
5731 : else
5732 129 : UnsetBit(p, iRow);
5733 : }
5734 : }
5735 : else
5736 : {
5737 6630 : memcpy(p, pSrcArray + nOffset / 8, nBytes);
5738 : }
5739 8911 : out_array->buffers[i] = p;
5740 : }
5741 : }
5742 13360 : else if (i == 1)
5743 : {
5744 11229 : CPLAssert(src_array->buffers[i]);
5745 11229 : size_t nEltSize = 0;
5746 11229 : size_t nExtraElt = 0;
5747 11229 : if (IsUInt8(format) || IsInt8(format))
5748 742 : nEltSize = sizeof(uint8_t);
5749 10487 : else if (IsUInt16(format) || IsInt16(format) || IsFloat16(format))
5750 762 : nEltSize = sizeof(uint16_t);
5751 19430 : else if (IsUInt32(format) || IsInt32(format) || IsFloat32(format) ||
5752 28056 : strcmp(format, "tdD") == 0 || strcmp(format, "tts") == 0 ||
5753 8626 : strcmp(format, "ttm") == 0)
5754 : {
5755 1316 : nEltSize = sizeof(uint32_t);
5756 : }
5757 13011 : else if (IsString(format) || IsBinary(format) || IsList(format) ||
5758 4602 : IsMap(format))
5759 : {
5760 4496 : nEltSize = sizeof(uint32_t);
5761 4496 : nExtraElt = 1;
5762 : }
5763 7455 : else if (IsUInt64(format) || IsInt64(format) || IsFloat64(format) ||
5764 1648 : strcmp(format, "tdm") == 0 || strcmp(format, "ttu") == 0 ||
5765 7455 : strcmp(format, "ttn") == 0 || IsTimestamp(format))
5766 : {
5767 3085 : nEltSize = sizeof(uint64_t);
5768 : }
5769 1318 : else if (IsLargeString(format) || IsLargeBinary(format) ||
5770 490 : IsLargeList(format))
5771 : {
5772 343 : nEltSize = sizeof(uint64_t);
5773 343 : nExtraElt = 1;
5774 : }
5775 485 : else if (IsFixedWidthBinary(format))
5776 : {
5777 111 : nEltSize = GetFixedWithBinary(format);
5778 : }
5779 374 : else if (IsDecimal(format))
5780 : {
5781 374 : int nPrecision = 0;
5782 374 : int nScale = 0;
5783 374 : int nWidthInBytes = 0;
5784 374 : if (!ParseDecimalFormat(format, nPrecision, nScale,
5785 : nWidthInBytes))
5786 : {
5787 0 : CPLError(
5788 : CE_Failure, CPLE_AppDefined,
5789 : "Unexpected error in OGRCloneArrowArray(): unhandled "
5790 : "field format: %s",
5791 : format);
5792 :
5793 0 : return false;
5794 : }
5795 374 : nEltSize = nWidthInBytes;
5796 : }
5797 11229 : if (nEltSize)
5798 : {
5799 11229 : void *p = VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
5800 : nLength ? nEltSize * (nLength + nExtraElt) : 1);
5801 11229 : if (!p)
5802 : {
5803 0 : bRet = false;
5804 0 : break;
5805 : }
5806 11229 : if (nLength)
5807 : {
5808 13022 : if ((IsString(format) || IsBinary(format)) &&
5809 1793 : static_cast<const uint32_t *>(
5810 1793 : src_array->buffers[1])[nOffset] != 0)
5811 : {
5812 258 : const auto *CPL_RESTRICT pSrcOffsets =
5813 258 : static_cast<const uint32_t *>(
5814 258 : src_array->buffers[1]) +
5815 : nOffset;
5816 258 : const auto nShiftOffset = pSrcOffsets[0];
5817 258 : auto *CPL_RESTRICT pDstOffsets =
5818 : static_cast<uint32_t *>(p);
5819 1118 : for (size_t iRow = 0; iRow <= nLength; ++iRow)
5820 : {
5821 860 : pDstOffsets[iRow] =
5822 860 : pSrcOffsets[iRow] - nShiftOffset;
5823 : }
5824 : }
5825 11309 : else if ((IsLargeString(format) || IsLargeBinary(format)) &&
5826 338 : static_cast<const uint64_t *>(
5827 338 : src_array->buffers[1])[nOffset] != 0)
5828 : {
5829 86 : const auto *CPL_RESTRICT pSrcOffsets =
5830 86 : static_cast<const uint64_t *>(
5831 86 : src_array->buffers[1]) +
5832 : nOffset;
5833 86 : const auto nShiftOffset = pSrcOffsets[0];
5834 86 : auto *CPL_RESTRICT pDstOffsets =
5835 : static_cast<uint64_t *>(p);
5836 344 : for (size_t iRow = 0; iRow <= nLength; ++iRow)
5837 : {
5838 258 : pDstOffsets[iRow] =
5839 258 : pSrcOffsets[iRow] - nShiftOffset;
5840 : }
5841 : }
5842 : else
5843 : {
5844 10885 : memcpy(
5845 : p,
5846 10885 : static_cast<const GByte *>(src_array->buffers[i]) +
5847 10885 : nEltSize * nOffset,
5848 10885 : nEltSize * (nLength + nExtraElt));
5849 : }
5850 : }
5851 11229 : out_array->buffers[i] = p;
5852 : }
5853 : else
5854 : {
5855 0 : CPLError(CE_Failure, CPLE_AppDefined,
5856 : "OGRCloneArrowArray(): unhandled case, array = %s, "
5857 : "format = '%s', i = 1",
5858 0 : schema->name, format);
5859 0 : bRet = false;
5860 0 : break;
5861 : }
5862 : }
5863 2131 : else if (i == 2)
5864 : {
5865 2131 : CPLAssert(src_array->buffers[i]);
5866 2131 : size_t nSrcCharOffset = 0;
5867 2131 : size_t nCharCount = 0;
5868 2131 : if (IsString(format) || IsBinary(format))
5869 : {
5870 1793 : const auto *pSrcOffsets =
5871 1793 : static_cast<const uint32_t *>(src_array->buffers[1]) +
5872 : nOffset;
5873 1793 : nSrcCharOffset = pSrcOffsets[0];
5874 1793 : nCharCount = pSrcOffsets[nLength] - pSrcOffsets[0];
5875 : }
5876 338 : else if (IsLargeString(format) || IsLargeBinary(format))
5877 : {
5878 338 : const auto *pSrcOffsets =
5879 338 : static_cast<const uint64_t *>(src_array->buffers[1]) +
5880 : nOffset;
5881 338 : nSrcCharOffset = static_cast<size_t>(pSrcOffsets[0]);
5882 338 : nCharCount =
5883 338 : static_cast<size_t>(pSrcOffsets[nLength] - pSrcOffsets[0]);
5884 : }
5885 : else
5886 : {
5887 0 : CPLError(CE_Failure, CPLE_AppDefined,
5888 : "OGRCloneArrowArray(): unhandled case, array = %s, "
5889 : "format = '%s', i = 2",
5890 0 : schema->name, format);
5891 0 : bRet = false;
5892 0 : break;
5893 : }
5894 : void *p =
5895 2131 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nCharCount ? nCharCount : 1);
5896 2131 : if (!p)
5897 : {
5898 0 : bRet = false;
5899 0 : break;
5900 : }
5901 2131 : if (nCharCount)
5902 : {
5903 2131 : memcpy(p,
5904 2131 : static_cast<const GByte *>(src_array->buffers[i]) +
5905 : nSrcCharOffset,
5906 : nCharCount);
5907 : }
5908 2131 : out_array->buffers[i] = p;
5909 : }
5910 : else
5911 : {
5912 0 : CPLError(CE_Failure, CPLE_AppDefined,
5913 : "OGRCloneArrowArray(): unhandled case, array = %s, format "
5914 : "= '%s', i = 3",
5915 0 : schema->name, format);
5916 0 : bRet = false;
5917 0 : break;
5918 : }
5919 : }
5920 :
5921 14093 : if (bRet)
5922 : {
5923 14093 : out_array->n_children = src_array->n_children;
5924 14093 : out_array->children = static_cast<struct ArrowArray **>(
5925 14093 : CPLCalloc(static_cast<size_t>(src_array->n_children),
5926 : sizeof(struct ArrowArray *)));
5927 27945 : for (int64_t i = 0; i < src_array->n_children; ++i)
5928 : {
5929 27704 : out_array->children[i] = static_cast<struct ArrowArray *>(
5930 13852 : CPLCalloc(1, sizeof(struct ArrowArray)));
5931 40215 : if (!OGRCloneArrowArray(schema->children[i], src_array->children[i],
5932 13852 : out_array->children[i],
5933 13852 : IsFixedSizeList(format)
5934 1341 : ? nOffset * GetFixedSizeList(format)
5935 12511 : : IsStructure(format) ? nOffset
5936 : : 0))
5937 : {
5938 0 : bRet = false;
5939 0 : break;
5940 : }
5941 : }
5942 : }
5943 :
5944 14093 : if (bRet && src_array->dictionary)
5945 : {
5946 111 : out_array->dictionary = static_cast<struct ArrowArray *>(
5947 111 : CPLCalloc(1, sizeof(struct ArrowArray)));
5948 111 : bRet = OGRCloneArrowArray(schema->dictionary, src_array->dictionary,
5949 : out_array->dictionary, 0);
5950 : }
5951 :
5952 14093 : if (!bRet)
5953 : {
5954 0 : out_array->release(out_array);
5955 0 : memset(out_array, 0, sizeof(*out_array));
5956 : }
5957 14093 : return bRet;
5958 : }
5959 :
5960 : /** Full/deep copy of an array.
5961 : *
5962 : * Renormalize the offset of the array (and its children) to 0.
5963 : *
5964 : * In case of failure, out_array will be let in a released state.
5965 : *
5966 : * @param schema Schema of the array. Must *NOT* be NULL.
5967 : * @param src_array Source array. Must *NOT* be NULL.
5968 : * @param out_array Output array. Must *NOT* be NULL (but its content may be random)
5969 : * @return true if success.
5970 : */
5971 130 : bool OGRCloneArrowArray(const struct ArrowSchema *schema,
5972 : const struct ArrowArray *src_array,
5973 : struct ArrowArray *out_array)
5974 : {
5975 130 : return OGRCloneArrowArray(schema, src_array, out_array, 0);
5976 : }
5977 :
5978 : /************************************************************************/
5979 : /* OGRCloneArrowMetadata() */
5980 : /************************************************************************/
5981 :
5982 23 : static void *OGRCloneArrowMetadata(const void *pMetadata)
5983 : {
5984 23 : if (!pMetadata)
5985 19 : return nullptr;
5986 4 : std::vector<GByte> abyOut;
5987 4 : const GByte *pabyMetadata = static_cast<const GByte *>(pMetadata);
5988 : int32_t nKVP;
5989 4 : abyOut.insert(abyOut.end(), pabyMetadata, pabyMetadata + sizeof(int32_t));
5990 4 : memcpy(&nKVP, pabyMetadata, sizeof(int32_t));
5991 4 : pabyMetadata += sizeof(int32_t);
5992 8 : for (int i = 0; i < nKVP; ++i)
5993 : {
5994 : int32_t nSizeKey;
5995 0 : abyOut.insert(abyOut.end(), pabyMetadata,
5996 4 : pabyMetadata + sizeof(int32_t));
5997 4 : memcpy(&nSizeKey, pabyMetadata, sizeof(int32_t));
5998 4 : pabyMetadata += sizeof(int32_t);
5999 4 : abyOut.insert(abyOut.end(), pabyMetadata, pabyMetadata + nSizeKey);
6000 4 : pabyMetadata += nSizeKey;
6001 :
6002 : int32_t nSizeValue;
6003 0 : abyOut.insert(abyOut.end(), pabyMetadata,
6004 4 : pabyMetadata + sizeof(int32_t));
6005 4 : memcpy(&nSizeValue, pabyMetadata, sizeof(int32_t));
6006 4 : pabyMetadata += sizeof(int32_t);
6007 4 : abyOut.insert(abyOut.end(), pabyMetadata, pabyMetadata + nSizeValue);
6008 4 : pabyMetadata += nSizeValue;
6009 : }
6010 :
6011 4 : GByte *pabyOut = static_cast<GByte *>(VSI_MALLOC_VERBOSE(abyOut.size()));
6012 4 : if (pabyOut)
6013 4 : memcpy(pabyOut, abyOut.data(), abyOut.size());
6014 4 : return pabyOut;
6015 : }
6016 :
6017 : /************************************************************************/
6018 : /* OGRCloneArrowSchema() */
6019 : /************************************************************************/
6020 :
6021 : /** Full/deep copy of a schema.
6022 : *
6023 : * In case of failure, out_schema will be let in a released state.
6024 : *
6025 : * @param schema Schema to clone. Must *NOT* be NULL.
6026 : * @param out_schema Output schema. Must *NOT* be NULL (but its content may be random)
6027 : * @return true if success.
6028 : */
6029 23 : bool OGRCloneArrowSchema(const struct ArrowSchema *schema,
6030 : struct ArrowSchema *out_schema)
6031 : {
6032 23 : memset(out_schema, 0, sizeof(*out_schema));
6033 23 : out_schema->release = OGRLayerFullReleaseSchema;
6034 23 : out_schema->format = CPLStrdup(schema->format);
6035 23 : out_schema->name = CPLStrdup(schema->name);
6036 23 : out_schema->metadata = static_cast<const char *>(
6037 23 : const_cast<const void *>(OGRCloneArrowMetadata(schema->metadata)));
6038 23 : out_schema->flags = schema->flags;
6039 23 : if (schema->n_children)
6040 : {
6041 5 : out_schema->children =
6042 5 : static_cast<struct ArrowSchema **>(VSI_CALLOC_VERBOSE(
6043 : static_cast<int>(schema->n_children), sizeof(ArrowSchema *)));
6044 5 : if (!out_schema->children)
6045 : {
6046 0 : out_schema->release(out_schema);
6047 0 : return false;
6048 : }
6049 5 : out_schema->n_children = schema->n_children;
6050 23 : for (int i = 0; i < static_cast<int>(schema->n_children); ++i)
6051 : {
6052 36 : out_schema->children[i] = static_cast<struct ArrowSchema *>(
6053 18 : CPLMalloc(sizeof(ArrowSchema)));
6054 18 : if (!OGRCloneArrowSchema(schema->children[i],
6055 18 : out_schema->children[i]))
6056 : {
6057 0 : out_schema->release(out_schema);
6058 0 : return false;
6059 : }
6060 : }
6061 : }
6062 23 : if (schema->dictionary)
6063 : {
6064 0 : out_schema->dictionary =
6065 0 : static_cast<struct ArrowSchema *>(CPLMalloc(sizeof(ArrowSchema)));
6066 0 : if (!OGRCloneArrowSchema(schema->dictionary, out_schema->dictionary))
6067 : {
6068 0 : out_schema->release(out_schema);
6069 0 : return false;
6070 : }
6071 : }
6072 23 : return true;
6073 : }
6074 :
6075 : /************************************************************************/
6076 : /* OGRLayer::IsArrowSchemaSupported() */
6077 : /************************************************************************/
6078 :
6079 : const struct
6080 : {
6081 : const char *arrowType;
6082 : OGRFieldType eType;
6083 : OGRFieldSubType eSubType;
6084 : } gasArrowTypesToOGR[] = {
6085 : {"b", OFTInteger, OFSTBoolean}, // Boolean
6086 : {"c", OFTInteger, OFSTInt16}, // Int8
6087 : {"C", OFTInteger, OFSTInt16}, // UInt8
6088 : {"s", OFTInteger, OFSTInt16}, // Int16
6089 : {"S", OFTInteger, OFSTNone}, // UInt16
6090 : {"i", OFTInteger, OFSTNone}, // Int32
6091 : {"I", OFTInteger64, OFSTNone}, // UInt32
6092 : {"l", OFTInteger64, OFSTNone}, // Int64
6093 : {"L", OFTReal,
6094 : OFSTNone}, // UInt64 (potentially lossy conversion if going through OGRFeature)
6095 : {"e", OFTReal, OFSTFloat32}, // float16
6096 : {"f", OFTReal, OFSTFloat32}, // float32
6097 : {"g", OFTReal, OFSTNone}, // float64
6098 : {"z", OFTBinary, OFSTNone}, // binary
6099 : {"Z", OFTBinary,
6100 : OFSTNone}, // large binary (will be limited to 32 bit length though if going through OGRFeature!)
6101 : {"u", OFTString, OFSTNone}, // string
6102 : {"U", OFTString, OFSTNone}, // large string
6103 : {"vu", OFTString, OFSTNone}, // string view
6104 : {"tdD", OFTDate, OFSTNone}, // date32[days]
6105 : {"tdm", OFTDate, OFSTNone}, // date64[milliseconds]
6106 : {"tts", OFTTime, OFSTNone}, // time32 [seconds]
6107 : {"ttm", OFTTime, OFSTNone}, // time32 [milliseconds]
6108 : {"ttu", OFTTime, OFSTNone}, // time64 [microseconds]
6109 : {"ttn", OFTTime, OFSTNone}, // time64 [nanoseconds]
6110 : };
6111 :
6112 : const struct
6113 : {
6114 : const char arrowLetter;
6115 : OGRFieldType eType;
6116 : OGRFieldSubType eSubType;
6117 : } gasListTypes[] = {
6118 : {ARROW_LETTER_BOOLEAN, OFTIntegerList, OFSTBoolean},
6119 : {ARROW_LETTER_INT8, OFTIntegerList, OFSTInt16},
6120 : {ARROW_LETTER_UINT8, OFTIntegerList, OFSTInt16},
6121 : {ARROW_LETTER_INT16, OFTIntegerList, OFSTInt16},
6122 : {ARROW_LETTER_UINT16, OFTIntegerList, OFSTNone},
6123 : {ARROW_LETTER_INT32, OFTIntegerList, OFSTNone},
6124 : {ARROW_LETTER_UINT32, OFTInteger64List, OFSTNone},
6125 : {ARROW_LETTER_INT64, OFTInteger64List, OFSTNone},
6126 : {ARROW_LETTER_UINT64, OFTRealList,
6127 : OFSTNone}, //(potentially lossy conversion if going through OGRFeature)
6128 : {ARROW_LETTER_FLOAT16, OFTRealList, OFSTFloat32},
6129 : {ARROW_LETTER_FLOAT32, OFTRealList, OFSTFloat32},
6130 : {ARROW_LETTER_FLOAT64, OFTRealList, OFSTNone},
6131 : {ARROW_LETTER_STRING, OFTStringList, OFSTNone},
6132 : {ARROW_LETTER_LARGE_STRING, OFTStringList, OFSTNone},
6133 : };
6134 :
6135 43 : static inline bool IsValidDictionaryIndexType(const char *format)
6136 : {
6137 40 : return (format[0] == ARROW_LETTER_INT8 || format[0] == ARROW_LETTER_UINT8 ||
6138 37 : format[0] == ARROW_LETTER_INT16 ||
6139 34 : format[0] == ARROW_LETTER_UINT16 ||
6140 31 : format[0] == ARROW_LETTER_INT32 ||
6141 9 : format[0] == ARROW_LETTER_UINT32 ||
6142 6 : format[0] == ARROW_LETTER_INT64 ||
6143 89 : format[0] == ARROW_LETTER_UINT64) &&
6144 86 : format[1] == 0;
6145 : }
6146 :
6147 230 : static bool IsSupportForJSONObj(const struct ArrowSchema *schema)
6148 : {
6149 230 : const char *format = schema->format;
6150 230 : if (IsStructure(format))
6151 : {
6152 35 : for (int64_t i = 0; i < schema->n_children; ++i)
6153 : {
6154 26 : if (!IsSupportForJSONObj(schema->children[i]))
6155 0 : return false;
6156 : }
6157 9 : return true;
6158 : }
6159 :
6160 2752 : for (const auto &sType : gasListTypes)
6161 : {
6162 2626 : if (format[0] == sType.arrowLetter && format[1] == 0)
6163 : {
6164 95 : return true;
6165 : }
6166 : }
6167 :
6168 366 : if (IsBinary(format) || IsLargeBinary(format) ||
6169 366 : IsFixedWidthBinary(format) || IsStringView(format))
6170 12 : return true;
6171 :
6172 114 : if (IsDecimal(format))
6173 : {
6174 6 : int nPrecision = 0;
6175 6 : int nScale = 0;
6176 6 : int nWidthInBytes = 0;
6177 6 : if (!ParseDecimalFormat(format, nPrecision, nScale, nWidthInBytes))
6178 : {
6179 0 : CPLError(CE_Failure, CPLE_AppDefined, "Invalid field format %s",
6180 : format);
6181 0 : return false;
6182 : }
6183 :
6184 6 : return GetErrorIfUnsupportedDecimal(nWidthInBytes, nPrecision) ==
6185 6 : nullptr;
6186 : }
6187 :
6188 108 : if (IsMap(format))
6189 : {
6190 74 : return IsStructure(schema->children[0]->format) &&
6191 148 : schema->children[0]->n_children == 2 &&
6192 222 : IsString(schema->children[0]->children[0]->format) &&
6193 148 : IsSupportForJSONObj(schema->children[0]->children[1]);
6194 : }
6195 :
6196 34 : if (IsList(format) || IsLargeList(format) || IsFixedSizeList(format))
6197 : {
6198 34 : return IsSupportForJSONObj(schema->children[0]);
6199 : }
6200 :
6201 0 : return false;
6202 : }
6203 :
6204 544 : static bool IsArrowSchemaSupportedInternal(const struct ArrowSchema *schema,
6205 : const std::string &osFieldPrefix,
6206 : std::string &osErrorMsg)
6207 : {
6208 0 : const auto AppendError = [&osErrorMsg](const std::string &osMsg)
6209 : {
6210 0 : if (!osErrorMsg.empty())
6211 0 : osErrorMsg += " ";
6212 0 : osErrorMsg += osMsg;
6213 544 : };
6214 :
6215 544 : const char *fieldName = schema->name;
6216 544 : const char *format = schema->format;
6217 544 : if (IsStructure(format))
6218 : {
6219 5 : bool bRet = true;
6220 5 : const std::string osNewPrefix(osFieldPrefix + fieldName + ".");
6221 21 : for (int64_t i = 0; i < schema->n_children; ++i)
6222 : {
6223 16 : if (!IsArrowSchemaSupportedInternal(schema->children[i],
6224 : osNewPrefix, osErrorMsg))
6225 0 : bRet = false;
6226 : }
6227 5 : return bRet;
6228 : }
6229 :
6230 539 : if (schema->dictionary)
6231 : {
6232 15 : if (!IsValidDictionaryIndexType(format))
6233 : {
6234 0 : AppendError("Dictionary only supported if the parent is of "
6235 : "type [U]Int[8|16|32|64]");
6236 0 : return false;
6237 : }
6238 :
6239 15 : schema = schema->dictionary;
6240 15 : format = schema->format;
6241 : }
6242 :
6243 539 : if (IsList(format) || IsLargeList(format) || IsFixedSizeList(format))
6244 : {
6245 : // Only some subtypes supported
6246 132 : const char *childFormat = schema->children[0]->format;
6247 1103 : for (const auto &sType : gasListTypes)
6248 : {
6249 1088 : if (childFormat[0] == sType.arrowLetter && childFormat[1] == 0)
6250 : {
6251 117 : return true;
6252 : }
6253 : }
6254 15 : if (IsStringView(childFormat))
6255 0 : return true;
6256 :
6257 15 : if (IsDecimal(childFormat))
6258 : {
6259 7 : int nPrecision = 0;
6260 7 : int nScale = 0;
6261 7 : int nWidthInBytes = 0;
6262 7 : if (!ParseDecimalFormat(childFormat, nPrecision, nScale,
6263 : nWidthInBytes))
6264 : {
6265 0 : AppendError(std::string("Invalid field format ") + childFormat +
6266 0 : " for field " + osFieldPrefix + fieldName);
6267 0 : return false;
6268 : }
6269 :
6270 : const char *pszError =
6271 7 : GetErrorIfUnsupportedDecimal(nWidthInBytes, nPrecision);
6272 7 : if (pszError)
6273 : {
6274 0 : AppendError(pszError);
6275 0 : return false;
6276 : }
6277 :
6278 7 : return true;
6279 : }
6280 :
6281 8 : if (IsSupportForJSONObj(schema))
6282 : {
6283 8 : return true;
6284 : }
6285 :
6286 0 : AppendError("Type list for field " + osFieldPrefix + fieldName +
6287 : " is not supported.");
6288 0 : return false;
6289 : }
6290 :
6291 407 : else if (IsMap(format))
6292 : {
6293 70 : if (IsSupportForJSONObj(schema))
6294 70 : return true;
6295 :
6296 0 : AppendError("Type map for field " + osFieldPrefix + fieldName +
6297 : " is not supported.");
6298 0 : return false;
6299 : }
6300 337 : else if (IsDecimal(format))
6301 : {
6302 6 : int nPrecision = 0;
6303 6 : int nScale = 0;
6304 6 : int nWidthInBytes = 0;
6305 6 : if (!ParseDecimalFormat(format, nPrecision, nScale, nWidthInBytes))
6306 : {
6307 0 : AppendError(std::string("Invalid field format ") + format +
6308 0 : " for field " + osFieldPrefix + fieldName);
6309 0 : return false;
6310 : }
6311 :
6312 : const char *pszError =
6313 6 : GetErrorIfUnsupportedDecimal(nWidthInBytes, nPrecision);
6314 6 : if (pszError)
6315 : {
6316 0 : AppendError(pszError);
6317 0 : return false;
6318 : }
6319 :
6320 6 : return true;
6321 : }
6322 : else
6323 : {
6324 4277 : for (const auto &sType : gasArrowTypesToOGR)
6325 : {
6326 4257 : if (strcmp(format, sType.arrowType) == 0)
6327 : {
6328 311 : return true;
6329 : }
6330 : }
6331 :
6332 20 : if (IsFixedWidthBinary(format) || IsTimestamp(format))
6333 20 : return true;
6334 :
6335 0 : AppendError("Type '" + std::string(format) + "' for field " +
6336 0 : osFieldPrefix + fieldName + " is not supported.");
6337 0 : return false;
6338 : }
6339 : }
6340 :
6341 : /** Returns whether the provided ArrowSchema is supported for writing.
6342 : *
6343 : * This method exists since not all drivers may support all Arrow data types.
6344 : *
6345 : * The ArrowSchema must be of type struct (format=+s)
6346 : *
6347 : * It is recommended to call this method before calling WriteArrowBatch().
6348 : *
6349 : * This is the same as the C function OGR_L_IsArrowSchemaSupported().
6350 : *
6351 : * @param schema Schema of type struct (format = '+s')
6352 : * @param papszOptions Options (none currently). Null terminated list, or nullptr.
6353 : * @param[out] osErrorMsg Reason of the failure, when this method returns false.
6354 : * @return true if the ArrowSchema is supported for writing.
6355 : * @since 3.8
6356 : */
6357 59 : bool OGRLayer::IsArrowSchemaSupported(const struct ArrowSchema *schema,
6358 : CPL_UNUSED CSLConstList papszOptions,
6359 : std::string &osErrorMsg) const
6360 : {
6361 59 : if (!IsStructure(schema->format))
6362 : {
6363 : osErrorMsg =
6364 : "IsArrowSchemaSupported() should be called on a schema that is a "
6365 1 : "struct of fields";
6366 1 : return false;
6367 : }
6368 :
6369 58 : bool bRet = true;
6370 586 : for (int64_t i = 0; i < schema->n_children; ++i)
6371 : {
6372 528 : if (!IsArrowSchemaSupportedInternal(schema->children[i], std::string(),
6373 : osErrorMsg))
6374 0 : bRet = false;
6375 : }
6376 58 : return bRet;
6377 : }
6378 :
6379 : /************************************************************************/
6380 : /* OGR_L_IsArrowSchemaSupported() */
6381 : /************************************************************************/
6382 :
6383 : /** Returns whether the provided ArrowSchema is supported for writing.
6384 : *
6385 : * This function exists since not all drivers may support all Arrow data types.
6386 : *
6387 : * The ArrowSchema must be of type struct (format=+s)
6388 : *
6389 : * It is recommended to call this function before calling OGR_L_WriteArrowBatch().
6390 : *
6391 : * This is the same as the C++ method OGRLayer::IsArrowSchemaSupported().
6392 : *
6393 : * @param hLayer Layer.
6394 : * @param schema Schema of type struct (format = '+s')
6395 : * @param papszOptions Options (none currently). Null terminated list, or nullptr.
6396 : * @param[out] ppszErrorMsg nullptr, or pointer to a string that will contain
6397 : * the reason of the failure, when this function returns false.
6398 : * @return true if the ArrowSchema is supported for writing.
6399 : * @since 3.8
6400 : */
6401 19 : bool OGR_L_IsArrowSchemaSupported(OGRLayerH hLayer,
6402 : const struct ArrowSchema *schema,
6403 : CSLConstList papszOptions,
6404 : char **ppszErrorMsg)
6405 : {
6406 19 : VALIDATE_POINTER1(hLayer, __func__, false);
6407 19 : VALIDATE_POINTER1(schema, __func__, false);
6408 :
6409 38 : std::string osErrorMsg;
6410 38 : if (!OGRLayer::FromHandle(hLayer)->IsArrowSchemaSupported(
6411 19 : schema, papszOptions, osErrorMsg))
6412 : {
6413 4 : if (ppszErrorMsg)
6414 4 : *ppszErrorMsg = VSIStrdup(osErrorMsg.c_str());
6415 4 : return false;
6416 : }
6417 : else
6418 : {
6419 15 : if (ppszErrorMsg)
6420 15 : *ppszErrorMsg = nullptr;
6421 15 : return true;
6422 : }
6423 : }
6424 :
6425 : /************************************************************************/
6426 : /* IsKnownCodedFieldDomain() */
6427 : /************************************************************************/
6428 :
6429 34 : static bool IsKnownCodedFieldDomain(OGRLayer *poLayer,
6430 : const char *arrowMetadata)
6431 : {
6432 34 : if (arrowMetadata)
6433 : {
6434 6 : const auto oMetadata = OGRParseArrowMetadata(arrowMetadata);
6435 6 : for (const auto &oIter : oMetadata)
6436 : {
6437 6 : if (oIter.first == MD_GDAL_OGR_DOMAIN_NAME)
6438 : {
6439 6 : auto poDS = poLayer->GetDataset();
6440 6 : if (poDS)
6441 : {
6442 : const auto poFieldDomain =
6443 6 : poDS->GetFieldDomain(oIter.second);
6444 12 : if (poFieldDomain &&
6445 6 : poFieldDomain->GetDomainType() == OFDT_CODED)
6446 : {
6447 6 : return true;
6448 : }
6449 : }
6450 : }
6451 : }
6452 : }
6453 28 : return false;
6454 : }
6455 :
6456 : /************************************************************************/
6457 : /* OGRLayer::CreateFieldFromArrowSchema() */
6458 : /************************************************************************/
6459 :
6460 : //! @cond Doxygen_Suppress
6461 472 : bool OGRLayer::CreateFieldFromArrowSchemaInternal(
6462 : const struct ArrowSchema *schema, const std::string &osFieldPrefix,
6463 : CSLConstList papszOptions)
6464 : {
6465 472 : const char *fieldName = schema->name;
6466 472 : const char *format = schema->format;
6467 472 : if (IsStructure(format))
6468 : {
6469 5 : if (IsArrowTimeStampWithOffsetField(schema))
6470 : {
6471 0 : OGRFieldDefn oFieldDefn((osFieldPrefix + fieldName).c_str(),
6472 0 : OFTDateTime);
6473 0 : oFieldDefn.SetTZFlag(OGR_TZFLAG_MIXED_TZ);
6474 0 : auto poLayerDefn = GetLayerDefn();
6475 0 : const int nFieldCountBefore = poLayerDefn->GetFieldCount();
6476 0 : if (CreateField(&oFieldDefn) != OGRERR_NONE ||
6477 0 : nFieldCountBefore + 1 != poLayerDefn->GetFieldCount())
6478 : {
6479 0 : return false;
6480 : }
6481 : }
6482 : else
6483 : {
6484 5 : const std::string osNewPrefix(osFieldPrefix + fieldName + ".");
6485 21 : for (int64_t i = 0; i < schema->n_children; ++i)
6486 : {
6487 16 : if (!CreateFieldFromArrowSchemaInternal(
6488 16 : schema->children[i], osNewPrefix, papszOptions))
6489 0 : return false;
6490 : }
6491 : }
6492 5 : return true;
6493 : }
6494 :
6495 934 : CPLStringList aosNativeTypes;
6496 467 : auto poLayer = const_cast<OGRLayer *>(this);
6497 467 : auto poDS = poLayer->GetDataset();
6498 467 : if (poDS)
6499 : {
6500 467 : auto poDriver = poDS->GetDriver();
6501 467 : if (poDriver)
6502 : {
6503 : const char *pszMetadataItem =
6504 467 : poDriver->GetMetadataItem(GDAL_DMD_CREATIONFIELDDATATYPES);
6505 467 : if (pszMetadataItem)
6506 467 : aosNativeTypes = CSLTokenizeString2(pszMetadataItem, " ", 0);
6507 : }
6508 : }
6509 :
6510 482 : if (schema->dictionary &&
6511 15 : !IsKnownCodedFieldDomain(poLayer, schema->metadata))
6512 : {
6513 13 : if (!IsValidDictionaryIndexType(format))
6514 : {
6515 0 : CPLError(CE_Failure, CPLE_NotSupported,
6516 : "Dictionary only supported if the parent is of "
6517 : "type [U]Int[8|16|32|64]");
6518 0 : return false;
6519 : }
6520 :
6521 13 : schema = schema->dictionary;
6522 13 : format = schema->format;
6523 : }
6524 :
6525 467 : const auto AddField = [this, schema, fieldName, &aosNativeTypes,
6526 : &osFieldPrefix, poDS](OGRFieldType eTypeIn,
6527 : OGRFieldSubType eSubTypeIn,
6528 3305 : int nWidth, int nPrecision)
6529 : {
6530 467 : const char *pszTypeName = OGRFieldDefn::GetFieldTypeName(eTypeIn);
6531 467 : auto eTypeOut = eTypeIn;
6532 467 : auto eSubTypeOut = eSubTypeIn;
6533 934 : if (!aosNativeTypes.empty() &&
6534 467 : aosNativeTypes.FindString(pszTypeName) < 0)
6535 : {
6536 20 : eTypeOut = OFTString;
6537 20 : eSubTypeOut =
6538 15 : (eTypeIn == OFTIntegerList || eTypeIn == OFTInteger64List ||
6539 8 : eTypeIn == OFTRealList || eTypeIn == OFTStringList)
6540 35 : ? OFSTJSON
6541 : : OFSTNone;
6542 : }
6543 :
6544 934 : const std::string osWantedOGRFieldName = osFieldPrefix + fieldName;
6545 934 : OGRFieldDefn oFieldDefn(osWantedOGRFieldName.c_str(), eTypeOut);
6546 467 : oFieldDefn.SetSubType(eSubTypeOut);
6547 467 : if (eTypeOut == eTypeIn && eSubTypeOut == eSubTypeIn)
6548 : {
6549 447 : oFieldDefn.SetWidth(nWidth);
6550 447 : oFieldDefn.SetPrecision(nPrecision);
6551 : }
6552 467 : oFieldDefn.SetNullable((schema->flags & ARROW_FLAG_NULLABLE) != 0);
6553 :
6554 467 : if (schema->metadata)
6555 : {
6556 62 : const auto oMetadata = OGRParseArrowMetadata(schema->metadata);
6557 63 : for (const auto &oIter : oMetadata)
6558 : {
6559 32 : if (oIter.first == MD_GDAL_OGR_TYPE)
6560 : {
6561 4 : const auto &osType = oIter.second;
6562 48 : for (auto eType = OFTInteger; eType <= OFTMaxType;)
6563 : {
6564 48 : if (OGRFieldDefn::GetFieldTypeName(eType) == osType)
6565 : {
6566 4 : oFieldDefn.SetType(eType);
6567 4 : break;
6568 : }
6569 44 : if (eType == OFTMaxType)
6570 0 : break;
6571 : else
6572 44 : eType = static_cast<OGRFieldType>(eType + 1);
6573 : }
6574 : }
6575 28 : else if (oIter.first == MD_GDAL_OGR_ALTERNATIVE_NAME)
6576 2 : oFieldDefn.SetAlternativeName(oIter.second.c_str());
6577 26 : else if (oIter.first == MD_GDAL_OGR_COMMENT)
6578 2 : oFieldDefn.SetComment(oIter.second);
6579 24 : else if (oIter.first == MD_GDAL_OGR_DEFAULT)
6580 2 : oFieldDefn.SetDefault(oIter.second.c_str());
6581 22 : else if (oIter.first == MD_GDAL_OGR_SUBTYPE)
6582 : {
6583 5 : if (eTypeIn == eTypeOut)
6584 : {
6585 4 : const auto &osSubType = oIter.second;
6586 4 : for (auto eSubType = OFSTNone;
6587 15 : eSubType <= OFSTMaxSubType;)
6588 : {
6589 15 : if (OGRFieldDefn::GetFieldSubTypeName(eSubType) ==
6590 : osSubType)
6591 : {
6592 4 : oFieldDefn.SetSubType(eSubType);
6593 4 : break;
6594 : }
6595 11 : if (eSubType == OFSTMaxSubType)
6596 0 : break;
6597 : else
6598 11 : eSubType =
6599 11 : static_cast<OGRFieldSubType>(eSubType + 1);
6600 : }
6601 : }
6602 : }
6603 17 : else if (oIter.first == MD_GDAL_OGR_WIDTH)
6604 6 : oFieldDefn.SetWidth(atoi(oIter.second.c_str()));
6605 11 : else if (oIter.first == MD_GDAL_OGR_UNIQUE)
6606 2 : oFieldDefn.SetUnique(oIter.second == "true");
6607 9 : else if (oIter.first == MD_GDAL_OGR_DOMAIN_NAME)
6608 : {
6609 2 : if (poDS && poDS->GetFieldDomain(oIter.second))
6610 2 : oFieldDefn.SetDomainName(oIter.second);
6611 : }
6612 13 : else if (oIter.first == ARROW_EXTENSION_NAME_KEY &&
6613 6 : (oIter.second == EXTENSION_NAME_ARROW_JSON ||
6614 : // Used by BigQuery through ADBC driver
6615 0 : oIter.second == "google:sqlType:json"))
6616 : {
6617 6 : oFieldDefn.SetSubType(OFSTJSON);
6618 : }
6619 1 : else if (oIter.first == ARROW_EXTENSION_NAME_KEY)
6620 : {
6621 0 : CPLDebug("OGR", "Unknown Arrow extension: %s",
6622 : oIter.second.c_str());
6623 : }
6624 : else
6625 : {
6626 1 : CPLDebug("OGR", "Unknown field metadata: %s",
6627 : oIter.first.c_str());
6628 : }
6629 : }
6630 : }
6631 467 : auto poLayerDefn = GetLayerDefn();
6632 467 : const int nFieldCountBefore = poLayerDefn->GetFieldCount();
6633 934 : if (CreateField(&oFieldDefn) != OGRERR_NONE ||
6634 467 : nFieldCountBefore + 1 != poLayerDefn->GetFieldCount())
6635 : {
6636 0 : return false;
6637 : }
6638 : const char *pszActualFieldName =
6639 467 : poLayerDefn->GetFieldDefn(nFieldCountBefore)->GetNameRef();
6640 467 : if (pszActualFieldName != osWantedOGRFieldName)
6641 : {
6642 : m_poPrivate
6643 1 : ->m_oMapArrowFieldNameToOGRFieldName[osWantedOGRFieldName] =
6644 1 : pszActualFieldName;
6645 : }
6646 467 : return true;
6647 467 : };
6648 :
6649 8437 : for (const auto &sType : gasArrowTypesToOGR)
6650 : {
6651 8206 : if (strcmp(format, sType.arrowType) == 0)
6652 : {
6653 236 : return AddField(sType.eType, sType.eSubType, 0, 0);
6654 : }
6655 : }
6656 :
6657 231 : if (IsMap(format))
6658 : {
6659 70 : return AddField(OFTString, OFSTJSON, 0, 0);
6660 : }
6661 :
6662 161 : if (IsTimestamp(format))
6663 : {
6664 20 : return AddField(OFTDateTime, OFSTNone, 0, 0);
6665 : }
6666 :
6667 141 : if (IsFixedWidthBinary(format))
6668 : {
6669 3 : return AddField(OFTBinary, OFSTNone, GetFixedWithBinary(format), 0);
6670 : }
6671 :
6672 138 : if (IsList(format) || IsLargeList(format) || IsFixedSizeList(format))
6673 : {
6674 132 : const char *childFormat = schema->children[0]->format;
6675 1103 : for (const auto &sType : gasListTypes)
6676 : {
6677 1088 : if (childFormat[0] == sType.arrowLetter && childFormat[1] == 0)
6678 : {
6679 117 : return AddField(sType.eType, sType.eSubType, 0, 0);
6680 : }
6681 : }
6682 :
6683 15 : if (IsStringView(childFormat))
6684 : {
6685 0 : return AddField(OFTStringList, OFSTNone, 0, 0);
6686 : }
6687 :
6688 15 : if (IsDecimal(childFormat))
6689 : {
6690 7 : int nPrecision = 0;
6691 7 : int nScale = 0;
6692 7 : int nWidthInBytes = 0;
6693 7 : if (!ParseDecimalFormat(childFormat, nPrecision, nScale,
6694 : nWidthInBytes))
6695 : {
6696 0 : CPLError(CE_Failure, CPLE_AppDefined, "%s",
6697 0 : (std::string("Invalid field format ") + format +
6698 0 : " for field " + osFieldPrefix + fieldName)
6699 : .c_str());
6700 0 : return false;
6701 : }
6702 :
6703 : const char *pszError =
6704 7 : GetErrorIfUnsupportedDecimal(nWidthInBytes, nPrecision);
6705 7 : if (pszError)
6706 : {
6707 0 : CPLError(CE_Failure, CPLE_NotSupported, "%s", pszError);
6708 0 : return false;
6709 : }
6710 :
6711 : // DBF convention: add space for negative sign and decimal separator
6712 7 : return AddField(OFTRealList, OFSTNone, nPrecision + 2, nScale);
6713 : }
6714 :
6715 8 : if (IsSupportForJSONObj(schema->children[0]))
6716 : {
6717 8 : return AddField(OFTString, OFSTJSON, 0, 0);
6718 : }
6719 :
6720 0 : CPLError(CE_Failure, CPLE_NotSupported, "%s",
6721 0 : ("List of type '" + std::string(childFormat) + "' for field " +
6722 0 : osFieldPrefix + fieldName + " is not supported.")
6723 : .c_str());
6724 0 : return false;
6725 : }
6726 :
6727 6 : if (IsDecimal(format))
6728 : {
6729 6 : int nPrecision = 0;
6730 6 : int nScale = 0;
6731 6 : int nWidthInBytes = 0;
6732 6 : if (!ParseDecimalFormat(format, nPrecision, nScale, nWidthInBytes))
6733 : {
6734 0 : CPLError(CE_Failure, CPLE_AppDefined, "%s",
6735 0 : (std::string("Invalid field format ") + format +
6736 0 : " for field " + osFieldPrefix + fieldName)
6737 : .c_str());
6738 0 : return false;
6739 : }
6740 :
6741 : const char *pszError =
6742 6 : GetErrorIfUnsupportedDecimal(nWidthInBytes, nPrecision);
6743 6 : if (pszError)
6744 : {
6745 0 : CPLError(CE_Failure, CPLE_NotSupported, "%s", pszError);
6746 0 : return false;
6747 : }
6748 :
6749 : // DBF convention: add space for negative sign and decimal separator
6750 6 : return AddField(OFTReal, OFSTNone, nPrecision + 2, nScale);
6751 : }
6752 :
6753 0 : CPLError(CE_Failure, CPLE_NotSupported, "%s",
6754 0 : ("Type '" + std::string(format) + "' for field " + osFieldPrefix +
6755 0 : fieldName + " is not supported.")
6756 : .c_str());
6757 0 : return false;
6758 : }
6759 :
6760 : //! @endcond
6761 :
6762 : /** Creates a field from an ArrowSchema.
6763 : *
6764 : * This should only be used for attribute fields. Geometry fields should
6765 : * be created with CreateGeomField(). The FID field should also not be
6766 : * passed with this method.
6767 : *
6768 : * Contrary to the IsArrowSchemaSupported() and WriteArrowBatch() methods, the
6769 : * passed schema must be for an individual field, and thus, is *not* of type
6770 : * struct (format=+s) (unless writing a set of fields grouped together in the
6771 : * same structure).
6772 : *
6773 : * Additional field metadata can be specified through the ArrowSchema::metadata
6774 : * field with the potential following items:
6775 : * <ul>
6776 : * <li>"GDAL:OGR:alternative_name": value of
6777 : * OGRFieldDefn::GetAlternativeNameRef()</li>
6778 : * <li>"GDAL:OGR:comment": value of OGRFieldDefn::GetComment()</li>
6779 : * <li>"GDAL:OGR:default": value of OGRFieldDefn::GetDefault()</li>
6780 : * <li>"GDAL:OGR:subtype": value of OGRFieldDefn::GetSubType()</li>
6781 : * <li>"GDAL:OGR:width": value of OGRFieldDefn::GetWidth() (serialized as a
6782 : * string)</li>
6783 : * <li>"GDAL:OGR:unique": value of OGRFieldDefn::IsUnique() (serialized as
6784 : * "true" or "false")</li>
6785 : * <li>"GDAL:OGR:domain_name": value of OGRFieldDefn::GetDomainName()</li>
6786 : * </ul>
6787 : *
6788 : * This method and CreateField() are mutually exclusive in the same session.
6789 : *
6790 : * This method is the same as the C function OGR_L_CreateFieldFromArrowSchema().
6791 : *
6792 : * @param schema Schema of the field to create.
6793 : * @param papszOptions Options (none currently). Null terminated list, or nullptr.
6794 : * @return true in case of success
6795 : * @since 3.8
6796 : */
6797 456 : bool OGRLayer::CreateFieldFromArrowSchema(const struct ArrowSchema *schema,
6798 : CSLConstList papszOptions)
6799 : {
6800 912 : return CreateFieldFromArrowSchemaInternal(schema, std::string(),
6801 912 : papszOptions);
6802 : }
6803 :
6804 : /************************************************************************/
6805 : /* OGR_L_CreateFieldFromArrowSchema() */
6806 : /************************************************************************/
6807 :
6808 : /** Creates a field from an ArrowSchema.
6809 : *
6810 : * This should only be used for attribute fields. Geometry fields should
6811 : * be created with CreateGeomField(). The FID field should also not be
6812 : * passed with this method.
6813 : *
6814 : * Contrary to the IsArrowSchemaSupported() and WriteArrowBatch() methods, the
6815 : * passed schema must be for an individual field, and thus, is *not* of type
6816 : * struct (format=+s) (unless writing a set of fields grouped together in the
6817 : * same structure).
6818 : *
6819 : * Additional field metadata can be specified through the ArrowSchema::metadata
6820 : * field with the potential following items:
6821 : * <ul>
6822 : * <li>"GDAL:OGR:alternative_name": value of
6823 : * OGRFieldDefn::GetAlternativeNameRef()</li>
6824 : * <li>"GDAL:OGR:comment": value of OGRFieldDefn::GetComment()</li>
6825 : * <li>"GDAL:OGR:default": value of OGRFieldDefn::GetDefault()</li>
6826 : * <li>"GDAL:OGR:subtype": value of OGRFieldDefn::GetSubType()</li>
6827 : * <li>"GDAL:OGR:width": value of OGRFieldDefn::GetWidth() (serialized as a
6828 : * string)</li>
6829 : * <li>"GDAL:OGR:unique": value of OGRFieldDefn::IsUnique() (serialized as
6830 : * "true" or "false")</li>
6831 : * <li>"GDAL:OGR:domain_name": value of OGRFieldDefn::GetDomainName()</li>
6832 : * </ul>
6833 : *
6834 : * This method and CreateField() are mutually exclusive in the same session.
6835 : *
6836 : * This method is the same as the C++ method OGRLayer::CreateFieldFromArrowSchema().
6837 : *
6838 : * @param hLayer Layer.
6839 : * @param schema Schema of the field to create.
6840 : * @param papszOptions Options (none currently). Null terminated list, or nullptr.
6841 : * @return true in case of success
6842 : * @since 3.8
6843 : */
6844 541 : bool OGR_L_CreateFieldFromArrowSchema(OGRLayerH hLayer,
6845 : const struct ArrowSchema *schema,
6846 : CSLConstList papszOptions)
6847 : {
6848 541 : VALIDATE_POINTER1(hLayer, __func__, false);
6849 541 : VALIDATE_POINTER1(schema, __func__, false);
6850 :
6851 1082 : return OGRLayer::FromHandle(hLayer)->CreateFieldFromArrowSchema(
6852 541 : schema, papszOptions);
6853 : }
6854 :
6855 : /************************************************************************/
6856 : /* BuildOGRFieldInfo() */
6857 : /************************************************************************/
6858 :
6859 : constexpr int FID_COLUMN_SPECIAL_OGR_FIELD_IDX = -2;
6860 :
6861 : struct FieldInfo
6862 : {
6863 : std::string osName{};
6864 : int iOGRFieldIdx = -1;
6865 : const char *format = nullptr;
6866 : OGRFieldType eNominalFieldType =
6867 : OFTMaxType; // OGR data type that would best match the Arrow type
6868 : OGRFieldType eTargetFieldType =
6869 : OFTMaxType; // actual OGR data type of the layer field
6870 : // OGR data type of the feature passed to FillFeature()
6871 : OGRFieldType eSetFeatureFieldType = OFTMaxType;
6872 : bool bIsGeomCol = false;
6873 : bool bUseDictionary = false;
6874 : bool bUseStringOptim = false;
6875 : int nWidthInBytes = 0; // only used for decimal fields
6876 : int nPrecision = 0; // only used for decimal fields
6877 : int nScale = 0; // only used for decimal fields
6878 : };
6879 :
6880 779 : static bool BuildOGRFieldInfo(
6881 : const struct ArrowSchema *schema, struct ArrowArray *array,
6882 : const OGRFeatureDefn *poFeatureDefn, const std::string &osFieldPrefix,
6883 : const CPLStringList &aosNativeTypes, bool &bFallbackTypesUsed,
6884 : std::vector<FieldInfo> &asFieldInfo, const char *pszFIDName,
6885 : const char *pszGeomFieldName, OGRLayer *poLayer,
6886 : const std::map<std::string, std::string> &oMapArrowFieldNameToOGRFieldName,
6887 : const struct ArrowSchema *&schemaFIDColumn,
6888 : struct ArrowArray *&arrayFIDColumn)
6889 : {
6890 779 : const char *fieldName = schema->name;
6891 779 : const char *format = schema->format;
6892 779 : if (IsStructure(format))
6893 : {
6894 9 : if (IsArrowTimeStampWithOffsetField(schema))
6895 : {
6896 0 : FieldInfo sInfo;
6897 0 : sInfo.osName = fieldName;
6898 0 : sInfo.format = "+s";
6899 0 : sInfo.eNominalFieldType = OFTDateTime;
6900 : const std::string &osExpectedOGRFieldName =
6901 0 : [&oMapArrowFieldNameToOGRFieldName,
6902 0 : &sInfo]() -> const std::string &
6903 : {
6904 : const auto oIter =
6905 0 : oMapArrowFieldNameToOGRFieldName.find(sInfo.osName);
6906 0 : if (oIter != oMapArrowFieldNameToOGRFieldName.end())
6907 0 : return oIter->second;
6908 0 : return sInfo.osName;
6909 0 : }();
6910 0 : sInfo.iOGRFieldIdx =
6911 0 : poFeatureDefn->GetFieldIndex(osExpectedOGRFieldName.c_str());
6912 0 : if (sInfo.iOGRFieldIdx >= 0)
6913 : {
6914 : const auto eOGRType =
6915 0 : poFeatureDefn->GetFieldDefn(sInfo.iOGRFieldIdx)->GetType();
6916 0 : sInfo.eTargetFieldType = eOGRType;
6917 : }
6918 0 : asFieldInfo.emplace_back(std::move(sInfo));
6919 : }
6920 : else
6921 : {
6922 9 : const std::string osNewPrefix(osFieldPrefix + fieldName + ".");
6923 39 : for (int64_t i = 0; i < array->n_children; ++i)
6924 : {
6925 30 : if (!BuildOGRFieldInfo(
6926 30 : schema->children[i], array->children[i], poFeatureDefn,
6927 : osNewPrefix, aosNativeTypes, bFallbackTypesUsed,
6928 : asFieldInfo, pszFIDName, pszGeomFieldName, poLayer,
6929 : oMapArrowFieldNameToOGRFieldName, schemaFIDColumn,
6930 : arrayFIDColumn))
6931 : {
6932 0 : return false;
6933 : }
6934 : }
6935 : }
6936 9 : return true;
6937 : }
6938 :
6939 1540 : FieldInfo sInfo;
6940 :
6941 789 : if (schema->dictionary &&
6942 19 : !IsKnownCodedFieldDomain(poLayer, schema->metadata))
6943 : {
6944 15 : if (!IsValidDictionaryIndexType(format))
6945 : {
6946 0 : CPLError(CE_Failure, CPLE_NotSupported,
6947 : "Dictionary only supported if the parent is of "
6948 : "type [U]Int[8|16|32|64]");
6949 0 : return false;
6950 : }
6951 :
6952 15 : sInfo.bUseDictionary = true;
6953 15 : schema = schema->dictionary;
6954 15 : format = schema->format;
6955 15 : array = array->dictionary;
6956 : }
6957 :
6958 770 : sInfo.osName = osFieldPrefix + fieldName;
6959 770 : sInfo.format = format;
6960 770 : if (pszFIDName && sInfo.osName == pszFIDName)
6961 : {
6962 35 : if (IsInt32(format) || IsInt64(format))
6963 : {
6964 34 : sInfo.iOGRFieldIdx = FID_COLUMN_SPECIAL_OGR_FIELD_IDX;
6965 34 : schemaFIDColumn = schema;
6966 34 : arrayFIDColumn = array;
6967 : }
6968 : else
6969 : {
6970 1 : CPLError(CE_Failure, CPLE_AppDefined,
6971 : "FID column '%s' should be of Arrow format 'i' "
6972 : "(int32) or 'l' (int64)",
6973 : sInfo.osName.c_str());
6974 1 : return false;
6975 : }
6976 : }
6977 : else
6978 : {
6979 : const std::string &osExpectedOGRFieldName =
6980 2204 : [&oMapArrowFieldNameToOGRFieldName, &sInfo]() -> const std::string &
6981 : {
6982 : const auto oIter =
6983 735 : oMapArrowFieldNameToOGRFieldName.find(sInfo.osName);
6984 735 : if (oIter != oMapArrowFieldNameToOGRFieldName.end())
6985 1 : return oIter->second;
6986 734 : return sInfo.osName;
6987 735 : }();
6988 735 : sInfo.iOGRFieldIdx =
6989 735 : poFeatureDefn->GetFieldIndex(osExpectedOGRFieldName.c_str());
6990 735 : if (sInfo.iOGRFieldIdx >= 0)
6991 : {
6992 655 : bool bTypeOK = false;
6993 : const auto eOGRType =
6994 655 : poFeatureDefn->GetFieldDefn(sInfo.iOGRFieldIdx)->GetType();
6995 655 : sInfo.eTargetFieldType = eOGRType;
6996 12083 : for (const auto &sType : gasArrowTypesToOGR)
6997 : {
6998 11740 : if (strcmp(format, sType.arrowType) == 0)
6999 : {
7000 312 : sInfo.bUseStringOptim = sType.eType == OFTString;
7001 312 : sInfo.eNominalFieldType = sType.eType;
7002 312 : if (eOGRType == sInfo.eNominalFieldType)
7003 : {
7004 281 : bTypeOK = true;
7005 281 : break;
7006 : }
7007 31 : else if (eOGRType == OFTString)
7008 : {
7009 4 : bFallbackTypesUsed = true;
7010 4 : bTypeOK = true;
7011 4 : break;
7012 : }
7013 27 : else if (eOGRType == OFTInteger &&
7014 10 : sType.eType == OFTInteger64)
7015 : {
7016 : // Potentially lossy.
7017 4 : CPLDebug("OGR",
7018 : "For field %s, writing from Arrow array of "
7019 : "type Int64 into OGR Int32 field. "
7020 : "Potentially loss conversion can happen",
7021 : sInfo.osName.c_str());
7022 4 : bFallbackTypesUsed = true;
7023 4 : bTypeOK = true;
7024 4 : break;
7025 : }
7026 23 : else if (eOGRType == OFTInteger && sType.eType == OFTReal)
7027 : {
7028 : // Potentially lossy.
7029 6 : CPLDebug("OGR",
7030 : "For field %s, writing from Arrow array of "
7031 : "type Real into OGR Int32 field. "
7032 : "Potentially loss conversion can happen",
7033 : sInfo.osName.c_str());
7034 6 : bFallbackTypesUsed = true;
7035 6 : bTypeOK = true;
7036 6 : break;
7037 : }
7038 17 : else if (eOGRType == OFTInteger64 && sType.eType == OFTReal)
7039 : {
7040 : // Potentially lossy.
7041 6 : CPLDebug("OGR",
7042 : "For field %s, writing from Arrow array of "
7043 : "type Real into OGR Int64 field. "
7044 : "Potentially loss conversion can happen",
7045 : sInfo.osName.c_str());
7046 6 : bFallbackTypesUsed = true;
7047 6 : bTypeOK = true;
7048 6 : break;
7049 : }
7050 11 : else if (eOGRType == OFTReal && sType.eType == OFTInteger64)
7051 : {
7052 : // Potentially lossy.
7053 4 : CPLDebug("OGR",
7054 : "For field %s, writing from Arrow array of "
7055 : "type Int64 into OGR Real field. "
7056 : "Potentially loss conversion can happen",
7057 : sInfo.osName.c_str());
7058 4 : bFallbackTypesUsed = true;
7059 4 : bTypeOK = true;
7060 4 : break;
7061 : }
7062 7 : else if ((eOGRType == OFTInteger64 ||
7063 4 : eOGRType == OFTReal) &&
7064 4 : sType.eType == OFTInteger)
7065 : {
7066 : // Non-lossy
7067 4 : bFallbackTypesUsed = true;
7068 4 : bTypeOK = true;
7069 4 : break;
7070 : }
7071 3 : else if (eOGRType == OFTDateTime &&
7072 3 : sType.eType == OFTString)
7073 : {
7074 3 : bFallbackTypesUsed = true;
7075 3 : bTypeOK = true;
7076 3 : break;
7077 : }
7078 : else
7079 : {
7080 0 : CPLError(CE_Failure, CPLE_AppDefined,
7081 : "For field %s, OGR field type is %s whereas "
7082 : "Arrow type implies %s",
7083 : sInfo.osName.c_str(),
7084 : OGR_GetFieldTypeName(eOGRType),
7085 0 : OGR_GetFieldTypeName(sType.eType));
7086 0 : return false;
7087 : }
7088 : }
7089 : }
7090 :
7091 655 : if (!bTypeOK && IsMap(format))
7092 : {
7093 106 : sInfo.eNominalFieldType = OFTString;
7094 106 : if (eOGRType == sInfo.eNominalFieldType)
7095 : {
7096 106 : bTypeOK = true;
7097 : }
7098 : else
7099 : {
7100 0 : CPLError(CE_Failure, CPLE_AppDefined,
7101 : "For field %s, OGR field type is %s whereas "
7102 : "Arrow type implies %s",
7103 : sInfo.osName.c_str(),
7104 : OGR_GetFieldTypeName(eOGRType),
7105 : OGR_GetFieldTypeName(OFTString));
7106 0 : return false;
7107 : }
7108 : }
7109 :
7110 655 : if (!bTypeOK && IsTimestamp(format))
7111 : {
7112 32 : sInfo.eNominalFieldType = OFTDateTime;
7113 32 : if (eOGRType == sInfo.eNominalFieldType)
7114 : {
7115 31 : bTypeOK = true;
7116 : }
7117 1 : else if (eOGRType == OFTString)
7118 : {
7119 1 : bFallbackTypesUsed = true;
7120 1 : bTypeOK = true;
7121 : }
7122 : else
7123 : {
7124 0 : CPLError(CE_Failure, CPLE_AppDefined,
7125 : "For field %s, OGR field type is %s whereas "
7126 : "Arrow type implies %s",
7127 : sInfo.osName.c_str(),
7128 : OGR_GetFieldTypeName(eOGRType),
7129 : OGR_GetFieldTypeName(OFTDateTime));
7130 0 : return false;
7131 : }
7132 : }
7133 :
7134 655 : if (!bTypeOK && IsFixedWidthBinary(format))
7135 : {
7136 5 : sInfo.eNominalFieldType = OFTBinary;
7137 5 : if (eOGRType == sInfo.eNominalFieldType)
7138 : {
7139 5 : bTypeOK = true;
7140 : }
7141 0 : else if (eOGRType == OFTString)
7142 : {
7143 0 : bFallbackTypesUsed = true;
7144 0 : bTypeOK = true;
7145 : }
7146 : else
7147 : {
7148 0 : CPLError(CE_Failure, CPLE_AppDefined,
7149 : "For field %s, OGR field type is %s whereas "
7150 : "Arrow type implies %s",
7151 : sInfo.osName.c_str(),
7152 : OGR_GetFieldTypeName(eOGRType),
7153 : OGR_GetFieldTypeName(OFTBinary));
7154 0 : return false;
7155 : }
7156 : }
7157 :
7158 728 : if (!bTypeOK && (IsList(format) || IsLargeList(format) ||
7159 73 : IsFixedSizeList(format)))
7160 : {
7161 190 : const char *childFormat = schema->children[0]->format;
7162 1565 : for (const auto &sType : gasListTypes)
7163 : {
7164 1544 : if (childFormat[0] == sType.arrowLetter &&
7165 169 : childFormat[1] == 0)
7166 : {
7167 169 : sInfo.eNominalFieldType = sType.eType;
7168 169 : if (eOGRType == sInfo.eNominalFieldType)
7169 : {
7170 154 : bTypeOK = true;
7171 154 : break;
7172 : }
7173 15 : else if (eOGRType == OFTString)
7174 : {
7175 15 : bFallbackTypesUsed = true;
7176 15 : bTypeOK = true;
7177 15 : break;
7178 : }
7179 : else
7180 : {
7181 0 : CPLError(CE_Failure, CPLE_AppDefined,
7182 : "For field %s, OGR field type is %s "
7183 : "whereas "
7184 : "Arrow type implies %s",
7185 : sInfo.osName.c_str(),
7186 : OGR_GetFieldTypeName(eOGRType),
7187 0 : OGR_GetFieldTypeName(sType.eType));
7188 0 : return false;
7189 : }
7190 : }
7191 : }
7192 :
7193 190 : if (!bTypeOK && IsStringView(childFormat))
7194 : {
7195 0 : sInfo.eNominalFieldType = OFTStringList;
7196 0 : if (eOGRType == sInfo.eNominalFieldType)
7197 : {
7198 0 : bTypeOK = true;
7199 : }
7200 0 : else if (eOGRType == OFTString)
7201 : {
7202 0 : bFallbackTypesUsed = true;
7203 0 : bTypeOK = true;
7204 : }
7205 : else
7206 : {
7207 0 : CPLError(CE_Failure, CPLE_AppDefined,
7208 : "For field %s, OGR field type is %s "
7209 : "whereas "
7210 : "Arrow type implies %s",
7211 : sInfo.osName.c_str(),
7212 : OGR_GetFieldTypeName(eOGRType),
7213 : OGR_GetFieldTypeName(OFTStringList));
7214 0 : return false;
7215 : }
7216 : }
7217 :
7218 190 : if (!bTypeOK && IsDecimal(childFormat))
7219 : {
7220 11 : if (!ParseDecimalFormat(childFormat, sInfo.nPrecision,
7221 : sInfo.nScale, sInfo.nWidthInBytes))
7222 : {
7223 0 : CPLError(CE_Failure, CPLE_AppDefined, "%s",
7224 0 : (std::string("Invalid field format ") +
7225 0 : childFormat + " for field " + osFieldPrefix +
7226 : fieldName)
7227 : .c_str());
7228 0 : return false;
7229 : }
7230 :
7231 11 : const char *pszError = GetErrorIfUnsupportedDecimal(
7232 : sInfo.nWidthInBytes, sInfo.nPrecision);
7233 11 : if (pszError)
7234 : {
7235 0 : CPLError(CE_Failure, CPLE_NotSupported, "%s", pszError);
7236 0 : return false;
7237 : }
7238 :
7239 11 : sInfo.eNominalFieldType = OFTRealList;
7240 11 : if (eOGRType == sInfo.eNominalFieldType)
7241 : {
7242 11 : bTypeOK = true;
7243 : }
7244 0 : else if (eOGRType == OFTString)
7245 : {
7246 0 : bFallbackTypesUsed = true;
7247 0 : bTypeOK = true;
7248 : }
7249 : else
7250 : {
7251 0 : CPLError(CE_Failure, CPLE_AppDefined,
7252 : "For field %s, OGR field type is %s whereas "
7253 : "Arrow type implies %s",
7254 : sInfo.osName.c_str(),
7255 : OGR_GetFieldTypeName(eOGRType),
7256 : OGR_GetFieldTypeName(OFTRealList));
7257 0 : return false;
7258 : }
7259 : }
7260 :
7261 190 : if (!bTypeOK && IsSupportForJSONObj(schema->children[0]))
7262 : {
7263 10 : sInfo.eNominalFieldType = OFTString;
7264 10 : if (eOGRType == sInfo.eNominalFieldType)
7265 : {
7266 10 : bTypeOK = true;
7267 : }
7268 : else
7269 : {
7270 0 : CPLError(CE_Failure, CPLE_AppDefined,
7271 : "For field %s, OGR field type is %s whereas "
7272 : "Arrow type implies %s",
7273 : sInfo.osName.c_str(),
7274 : OGR_GetFieldTypeName(eOGRType),
7275 : OGR_GetFieldTypeName(OFTString));
7276 0 : return false;
7277 : }
7278 : }
7279 :
7280 190 : if (!bTypeOK)
7281 : {
7282 0 : CPLError(CE_Failure, CPLE_NotSupported, "%s",
7283 0 : ("List of type '" + std::string(childFormat) +
7284 0 : "' for field " + osFieldPrefix + fieldName +
7285 : " is not supported.")
7286 : .c_str());
7287 0 : return false;
7288 : }
7289 : }
7290 :
7291 655 : if (!bTypeOK && IsDecimal(format))
7292 : {
7293 10 : if (!ParseDecimalFormat(format, sInfo.nPrecision, sInfo.nScale,
7294 : sInfo.nWidthInBytes))
7295 : {
7296 0 : CPLError(CE_Failure, CPLE_AppDefined, "%s",
7297 0 : (std::string("Invalid field format ") + format +
7298 0 : " for field " + osFieldPrefix + fieldName)
7299 : .c_str());
7300 0 : return false;
7301 : }
7302 :
7303 10 : const char *pszError = GetErrorIfUnsupportedDecimal(
7304 : sInfo.nWidthInBytes, sInfo.nPrecision);
7305 10 : if (pszError)
7306 : {
7307 0 : CPLError(CE_Failure, CPLE_NotSupported, "%s", pszError);
7308 0 : return false;
7309 : }
7310 :
7311 10 : sInfo.eNominalFieldType = OFTReal;
7312 10 : if (eOGRType == sInfo.eNominalFieldType)
7313 : {
7314 10 : bTypeOK = true;
7315 : }
7316 0 : else if (eOGRType == OFTString)
7317 : {
7318 0 : bFallbackTypesUsed = true;
7319 0 : bTypeOK = true;
7320 : }
7321 : else
7322 : {
7323 0 : CPLError(CE_Failure, CPLE_AppDefined,
7324 : "For field %s, OGR field type is %s whereas "
7325 : "Arrow type implies %s",
7326 : sInfo.osName.c_str(),
7327 : OGR_GetFieldTypeName(eOGRType),
7328 : OGR_GetFieldTypeName(OFTReal));
7329 0 : return false;
7330 : }
7331 : }
7332 :
7333 655 : if (!bTypeOK)
7334 : {
7335 0 : CPLError(CE_Failure, CPLE_NotSupported, "%s",
7336 0 : ("Type '" + std::string(format) + "' for field " +
7337 0 : osFieldPrefix + fieldName + " is not supported.")
7338 : .c_str());
7339 0 : return false;
7340 : }
7341 : }
7342 : else
7343 : {
7344 80 : sInfo.iOGRFieldIdx = poFeatureDefn->GetGeomFieldIndex(
7345 80 : osExpectedOGRFieldName.c_str());
7346 80 : if (sInfo.iOGRFieldIdx < 0)
7347 : {
7348 52 : if (pszGeomFieldName && pszGeomFieldName == sInfo.osName)
7349 : {
7350 47 : if (poFeatureDefn->GetGeomFieldCount() == 0)
7351 : {
7352 0 : CPLError(CE_Failure, CPLE_AppDefined,
7353 : "Cannot find OGR geometry field for Arrow "
7354 : "array %s",
7355 : sInfo.osName.c_str());
7356 0 : return false;
7357 : }
7358 47 : sInfo.iOGRFieldIdx = 0;
7359 : }
7360 : else
7361 : {
7362 : // Check if ARROW:extension:name = ogc.wkb or geoarrow.wkb
7363 5 : const char *pabyMetadata = schema->metadata;
7364 5 : if (pabyMetadata)
7365 : {
7366 : const auto oMetadata =
7367 5 : OGRParseArrowMetadata(pabyMetadata);
7368 5 : auto oIter = oMetadata.find(ARROW_EXTENSION_NAME_KEY);
7369 10 : if (oIter != oMetadata.end() &&
7370 5 : (oIter->second == EXTENSION_NAME_OGC_WKB ||
7371 0 : oIter->second == EXTENSION_NAME_GEOARROW_WKB))
7372 : {
7373 5 : if (poFeatureDefn->GetGeomFieldCount() == 0)
7374 : {
7375 0 : CPLError(CE_Failure, CPLE_AppDefined,
7376 : "Cannot find OGR geometry field "
7377 : "for Arrow array %s",
7378 : sInfo.osName.c_str());
7379 0 : return false;
7380 : }
7381 5 : sInfo.iOGRFieldIdx = 0;
7382 : }
7383 : }
7384 : }
7385 :
7386 52 : if (sInfo.iOGRFieldIdx < 0)
7387 : {
7388 0 : CPLError(CE_Failure, CPLE_AppDefined,
7389 : "Cannot find OGR field for Arrow array %s",
7390 : sInfo.osName.c_str());
7391 0 : return false;
7392 : }
7393 : }
7394 :
7395 80 : if (!IsBinary(format) && !IsLargeBinary(format))
7396 : {
7397 0 : CPLError(CE_Failure, CPLE_AppDefined,
7398 : "Geometry column '%s' should be of Arrow format "
7399 : "'z' (binary) or 'Z' (large binary)",
7400 : sInfo.osName.c_str());
7401 0 : return false;
7402 : }
7403 80 : sInfo.bIsGeomCol = true;
7404 : }
7405 : }
7406 :
7407 769 : asFieldInfo.emplace_back(std::move(sInfo));
7408 769 : return true;
7409 : }
7410 :
7411 : /************************************************************************/
7412 : /* GetUInt64Value() */
7413 : /************************************************************************/
7414 :
7415 90 : static inline uint64_t GetUInt64Value(const struct ArrowSchema *schema,
7416 : const struct ArrowArray *array,
7417 : size_t iFeature)
7418 : {
7419 90 : uint64_t nVal = 0;
7420 90 : CPLAssert(schema->format[1] == 0);
7421 90 : switch (schema->format[0])
7422 : {
7423 8 : case ARROW_LETTER_INT8:
7424 8 : nVal = GetValue<int8_t>(array, iFeature);
7425 8 : break;
7426 8 : case ARROW_LETTER_UINT8:
7427 8 : nVal = GetValue<uint8_t>(array, iFeature);
7428 8 : break;
7429 8 : case ARROW_LETTER_INT16:
7430 8 : nVal = GetValue<int16_t>(array, iFeature);
7431 8 : break;
7432 8 : case ARROW_LETTER_UINT16:
7433 8 : nVal = GetValue<uint16_t>(array, iFeature);
7434 8 : break;
7435 34 : case ARROW_LETTER_INT32:
7436 34 : nVal = GetValue<int32_t>(array, iFeature);
7437 34 : break;
7438 8 : case ARROW_LETTER_UINT32:
7439 8 : nVal = GetValue<uint32_t>(array, iFeature);
7440 8 : break;
7441 8 : case ARROW_LETTER_INT64:
7442 8 : nVal = GetValue<int64_t>(array, iFeature);
7443 8 : break;
7444 8 : case ARROW_LETTER_UINT64:
7445 8 : nVal = GetValue<uint64_t>(array, iFeature);
7446 8 : break;
7447 0 : default:
7448 : // Shouldn't happen given checks in BuildOGRFieldInfo()
7449 0 : CPLAssert(false);
7450 : break;
7451 : }
7452 90 : return nVal;
7453 : }
7454 :
7455 : /************************************************************************/
7456 : /* GetWorkingBufferSize() */
7457 : /************************************************************************/
7458 :
7459 1382830 : static size_t GetWorkingBufferSize(const struct ArrowSchema *schema,
7460 : const struct ArrowArray *array,
7461 : size_t iFeature, int &iArrowIdxInOut,
7462 : const std::vector<FieldInfo> &asFieldInfo)
7463 : {
7464 1382830 : const char *fieldName = schema->name;
7465 1382830 : const char *format = schema->format;
7466 1382830 : const int iArrowIdx = iArrowIdxInOut;
7467 1382830 : if (IsStructure(format))
7468 : {
7469 60426 : if (asFieldInfo[iArrowIdx].eNominalFieldType == OFTDateTime)
7470 : {
7471 0 : ++iArrowIdxInOut;
7472 0 : return 0;
7473 : }
7474 :
7475 60426 : size_t nRet = 0;
7476 1382850 : for (int64_t i = 0; i < array->n_children; ++i)
7477 : {
7478 1322420 : nRet += GetWorkingBufferSize(
7479 1322420 : schema->children[i], array->children[i],
7480 1322420 : iFeature + static_cast<size_t>(array->offset), iArrowIdxInOut,
7481 : asFieldInfo);
7482 : }
7483 60426 : return nRet;
7484 : }
7485 1322400 : ++iArrowIdxInOut;
7486 :
7487 1322400 : if (!asFieldInfo[iArrowIdx].bUseStringOptim)
7488 122188 : return 0;
7489 :
7490 1200210 : const uint8_t *pabyValidity =
7491 1200210 : static_cast<const uint8_t *>(array->buffers[0]);
7492 1200340 : if (array->null_count != 0 && pabyValidity &&
7493 123 : !TestBit(pabyValidity, static_cast<size_t>(iFeature + array->offset)))
7494 : {
7495 : // empty string
7496 57 : return 0;
7497 : }
7498 :
7499 1200160 : if (asFieldInfo[iArrowIdx].bUseDictionary)
7500 : {
7501 41 : const uint64_t nDictIdx = GetUInt64Value(schema, array, iFeature);
7502 41 : const auto dictArray = array->dictionary;
7503 41 : if (nDictIdx >= static_cast<uint64_t>(dictArray->length))
7504 : {
7505 1 : CPLError(CE_Failure, CPLE_AppDefined,
7506 : "Feature %" PRIu64
7507 : ", field %s: invalid dictionary index: %" PRIu64,
7508 : static_cast<uint64_t>(iFeature), fieldName, nDictIdx);
7509 1 : return 0;
7510 : }
7511 :
7512 40 : array = dictArray;
7513 40 : schema = schema->dictionary;
7514 40 : format = schema->format;
7515 40 : iFeature = static_cast<size_t>(nDictIdx);
7516 : }
7517 :
7518 1200160 : constexpr size_t SZ_NUL_TERMINATOR = 1;
7519 1200160 : if (IsString(format))
7520 : {
7521 1200150 : const auto *panOffsets =
7522 1200150 : static_cast<const uint32_t *>(array->buffers[1]) + array->offset;
7523 1200150 : return (panOffsets[iFeature + 1] - panOffsets[iFeature]) +
7524 1200150 : SZ_NUL_TERMINATOR;
7525 : }
7526 10 : else if (IsLargeString(format))
7527 : {
7528 10 : const auto *panOffsets =
7529 10 : static_cast<const uint64_t *>(array->buffers[1]) + array->offset;
7530 10 : return static_cast<size_t>(panOffsets[iFeature + 1] -
7531 10 : panOffsets[iFeature]) +
7532 10 : SZ_NUL_TERMINATOR;
7533 : }
7534 0 : else if (IsStringView(format))
7535 : {
7536 0 : const auto *panStringView =
7537 0 : static_cast<const uint32_t *>(array->buffers[1]) +
7538 0 : array->offset * N_VALUES_PER_STRING_VIEW;
7539 0 : return panStringView[iFeature * N_VALUES_PER_STRING_VIEW] +
7540 0 : SZ_NUL_TERMINATOR;
7541 : }
7542 0 : return 0;
7543 : }
7544 :
7545 : /************************************************************************/
7546 : /* FillField() */
7547 : /************************************************************************/
7548 :
7549 : template <typename ArrowType, typename OGRType = ArrowType>
7550 462 : inline static void FillField(const struct ArrowArray *array, int iOGRFieldIdx,
7551 : size_t iFeature, OGRFeature &oFeature)
7552 : {
7553 462 : const auto *panValues = static_cast<const ArrowType *>(array->buffers[1]);
7554 462 : oFeature.SetFieldSameTypeUnsafe(
7555 : iOGRFieldIdx,
7556 462 : static_cast<OGRType>(panValues[iFeature + array->offset]));
7557 462 : }
7558 :
7559 : /************************************************************************/
7560 : /* FillFieldString() */
7561 : /************************************************************************/
7562 :
7563 : template <typename OffsetType>
7564 : inline static void
7565 1200160 : FillFieldString(const struct ArrowArray *array, int iOGRFieldIdx,
7566 : size_t iFeature, int iArrowIdx,
7567 : const std::vector<FieldInfo> &asFieldInfo,
7568 : std::string &osWorkingBuffer, OGRFeature &oFeature)
7569 : {
7570 1200160 : const auto *panOffsets =
7571 1200160 : static_cast<const OffsetType *>(array->buffers[1]) + array->offset;
7572 1200160 : const char *pszStr = static_cast<const char *>(array->buffers[2]);
7573 1200160 : const size_t nLen =
7574 1200160 : static_cast<size_t>(panOffsets[iFeature + 1] - panOffsets[iFeature]);
7575 1200160 : if (asFieldInfo[iArrowIdx].bUseStringOptim)
7576 : {
7577 1200160 : oFeature.SetFieldSameTypeUnsafe(
7578 1200160 : iOGRFieldIdx, &osWorkingBuffer[0] + osWorkingBuffer.size());
7579 1200160 : osWorkingBuffer.append(pszStr + panOffsets[iFeature], nLen);
7580 1200160 : osWorkingBuffer.push_back(0); // append null character
7581 : }
7582 : else
7583 : {
7584 0 : oFeature.SetField(iOGRFieldIdx, std::string_view(pszStr, nLen));
7585 : }
7586 1200160 : }
7587 :
7588 : /************************************************************************/
7589 : /* FillFieldStringView() */
7590 : /************************************************************************/
7591 :
7592 : inline static void
7593 0 : FillFieldStringView(const struct ArrowArray *array, int iOGRFieldIdx,
7594 : size_t iFeature, int iArrowIdx,
7595 : const std::vector<FieldInfo> &asFieldInfo,
7596 : std::string &osWorkingBuffer, OGRFeature &oFeature)
7597 : {
7598 0 : const auto sv = GetStringView(array, iFeature);
7599 0 : if (asFieldInfo[iArrowIdx].bUseStringOptim)
7600 : {
7601 0 : oFeature.SetFieldSameTypeUnsafe(
7602 0 : iOGRFieldIdx, &osWorkingBuffer[0] + osWorkingBuffer.size());
7603 0 : osWorkingBuffer.append(sv);
7604 0 : osWorkingBuffer.push_back(0); // append null character
7605 : }
7606 : else
7607 : {
7608 0 : oFeature.SetField(iOGRFieldIdx, sv);
7609 : }
7610 0 : }
7611 :
7612 : /************************************************************************/
7613 : /* FillFieldBinary() */
7614 : /************************************************************************/
7615 :
7616 : template <typename OffsetType>
7617 : inline static bool
7618 60344 : FillFieldBinary(const struct ArrowArray *array, int iOGRFieldIdx,
7619 : size_t iFeature, int iArrowIdx,
7620 : const std::vector<FieldInfo> &asFieldInfo,
7621 : const std::string &osFieldPrefix, const char *pszFieldName,
7622 : OGRFeature &oFeature)
7623 : {
7624 60344 : const auto *panOffsets =
7625 60344 : static_cast<const OffsetType *>(array->buffers[1]) + array->offset;
7626 60344 : const GByte *pabyData = static_cast<const GByte *>(array->buffers[2]) +
7627 60344 : static_cast<size_t>(panOffsets[iFeature]);
7628 60344 : const size_t nLen =
7629 60344 : static_cast<size_t>(panOffsets[iFeature + 1] - panOffsets[iFeature]);
7630 60344 : if (asFieldInfo[iArrowIdx].bIsGeomCol)
7631 : {
7632 60316 : size_t nBytesConsumedOut = 0;
7633 :
7634 : // Check if we can reuse the existing geometry, to save dynamic memory
7635 : // allocations.
7636 60316 : if (nLen >= 5 && pabyData[0] == wkbNDR && pabyData[1] <= wkbTriangle &&
7637 60309 : pabyData[2] == 0 && pabyData[3] == 0 && pabyData[4] == 0)
7638 : {
7639 60309 : const auto poExistingGeom = oFeature.GetGeomFieldRef(iOGRFieldIdx);
7640 120570 : if (poExistingGeom &&
7641 60261 : poExistingGeom->getGeometryType() == pabyData[1])
7642 : {
7643 60261 : poExistingGeom->importFromWkb(pabyData, nLen, wkbVariantIso,
7644 : nBytesConsumedOut);
7645 60261 : return true;
7646 : }
7647 : }
7648 :
7649 55 : OGRGeometry *poGeometry = nullptr;
7650 55 : OGRGeometryFactory::createFromWkb(pabyData, nullptr, &poGeometry, nLen,
7651 : wkbVariantIso, nBytesConsumedOut);
7652 55 : oFeature.SetGeomFieldDirectly(iOGRFieldIdx, poGeometry);
7653 : }
7654 : else
7655 : {
7656 28 : if (nLen > static_cast<size_t>(std::numeric_limits<int>::max()))
7657 : {
7658 0 : CPLError(CE_Failure, CPLE_NotSupported,
7659 : "Content for field %s%s is too large",
7660 : osFieldPrefix.c_str(), pszFieldName);
7661 0 : return false;
7662 : }
7663 28 : oFeature.SetField(iOGRFieldIdx, static_cast<int>(nLen), pabyData);
7664 : }
7665 83 : return true;
7666 : }
7667 :
7668 : /************************************************************************/
7669 : /* FillFeature() */
7670 : /************************************************************************/
7671 :
7672 1322420 : static bool FillFeature(OGRLayer *poLayer, const struct ArrowSchema *schema,
7673 : const struct ArrowArray *array,
7674 : const std::string &osFieldPrefix, size_t iFeature,
7675 : int &iArrowIdxInOut,
7676 : const std::vector<FieldInfo> &asFieldInfo,
7677 : OGRFeature &oFeature, std::string &osWorkingBuffer)
7678 :
7679 : {
7680 1322420 : const char *fieldName = schema->name;
7681 1322420 : const char *format = schema->format;
7682 1322420 : const int iArrowIdx = iArrowIdxInOut;
7683 1322420 : if (IsStructure(format))
7684 : {
7685 19 : if (asFieldInfo[iArrowIdx].eNominalFieldType == OFTDateTime)
7686 : {
7687 0 : ++iArrowIdxInOut;
7688 0 : const int iOGRFieldIdx = asFieldInfo[iArrowIdx].iOGRFieldIdx;
7689 :
7690 0 : if (array->null_count != 0)
7691 : {
7692 0 : const uint8_t *pabyValidity =
7693 0 : static_cast<const uint8_t *>(array->buffers[0]);
7694 0 : if (pabyValidity &&
7695 0 : !TestBit(pabyValidity,
7696 0 : static_cast<size_t>(iFeature + array->offset)))
7697 : {
7698 0 : OGRField *psField = oFeature.GetRawFieldRef(iOGRFieldIdx);
7699 0 : OGR_RawField_SetNull(psField);
7700 : }
7701 : }
7702 :
7703 0 : const auto *panTimestamps =
7704 0 : static_cast<const int64_t *>(array->children[0]->buffers[1]);
7705 0 : int64_t nTimestamp = panTimestamps[iFeature + array->offset];
7706 0 : const auto *panOffsetsMinutes =
7707 0 : static_cast<const int16_t *>(array->children[1]->buffers[1]);
7708 0 : const int nOffsetMinute =
7709 0 : panOffsetsMinutes[iFeature + array->offset];
7710 0 : const int nTZFlag =
7711 0 : nOffsetMinute >= -14 * 60 && nOffsetMinute <= 14 * 60
7712 0 : ? OGR_TZFLAG_UTC + nOffsetMinute / 15
7713 : : OGR_TZFLAG_UTC;
7714 :
7715 0 : const char *formatTS = schema->children[0]->format;
7716 : const int nInvFactorToSecond =
7717 0 : IsTimestampSeconds(formatTS) ? 1
7718 0 : : IsTimestampMilliseconds(formatTS) ? 1000
7719 0 : : IsTimestampMicroseconds(formatTS) ? 1000 * 1000
7720 0 : : IsTimestampNanoseconds(formatTS) ? 1000 * 1000 * 1000
7721 0 : : 1;
7722 0 : double floatingPart = 0;
7723 0 : if (nInvFactorToSecond)
7724 : {
7725 0 : floatingPart = (nTimestamp % nInvFactorToSecond) /
7726 0 : double(nInvFactorToSecond);
7727 0 : nTimestamp /= nInvFactorToSecond;
7728 : }
7729 0 : nTimestamp += (nTZFlag - OGR_TZFLAG_UTC) * 15 * 60;
7730 : struct tm dt;
7731 0 : CPLUnixTimeToYMDHMS(nTimestamp, &dt);
7732 0 : oFeature.SetField(iOGRFieldIdx, dt.tm_year + 1900, dt.tm_mon + 1,
7733 : dt.tm_mday, dt.tm_hour, dt.tm_min,
7734 0 : static_cast<float>(dt.tm_sec + floatingPart),
7735 : nTZFlag);
7736 : }
7737 : else
7738 : {
7739 19 : const std::string osNewPrefix(osFieldPrefix + fieldName + ".");
7740 78 : for (int64_t i = 0; i < array->n_children; ++i)
7741 : {
7742 59 : if (!FillFeature(poLayer, schema->children[i],
7743 59 : array->children[i], osNewPrefix,
7744 59 : iFeature + static_cast<size_t>(array->offset),
7745 : iArrowIdxInOut, asFieldInfo, oFeature,
7746 : osWorkingBuffer))
7747 0 : return false;
7748 : }
7749 : }
7750 19 : return true;
7751 : }
7752 1322400 : ++iArrowIdxInOut;
7753 1322400 : const int iOGRFieldIdx = asFieldInfo[iArrowIdx].iOGRFieldIdx;
7754 :
7755 1322400 : if (asFieldInfo[iArrowIdx].bUseDictionary)
7756 : {
7757 62 : format = schema->dictionary->format;
7758 : }
7759 :
7760 1322400 : if (array->null_count != 0)
7761 : {
7762 1011 : const uint8_t *pabyValidity =
7763 1011 : static_cast<const uint8_t *>(array->buffers[0]);
7764 1973 : if (pabyValidity &&
7765 962 : !TestBit(pabyValidity,
7766 962 : static_cast<size_t>(iFeature + array->offset)))
7767 : {
7768 298 : if (iOGRFieldIdx == FID_COLUMN_SPECIAL_OGR_FIELD_IDX)
7769 5 : oFeature.SetFID(OGRNullFID);
7770 293 : else if (asFieldInfo[iArrowIdx].bIsGeomCol)
7771 70 : oFeature.SetGeomFieldDirectly(iOGRFieldIdx, nullptr);
7772 223 : else if (asFieldInfo[iArrowIdx].eSetFeatureFieldType == OFTString)
7773 : {
7774 120 : OGRField *psField = oFeature.GetRawFieldRef(iOGRFieldIdx);
7775 120 : if (!asFieldInfo[iArrowIdx].bUseStringOptim)
7776 : {
7777 63 : if (IsValidField(psField))
7778 : {
7779 51 : CPLFree(psField->String);
7780 51 : OGR_RawField_SetNull(psField);
7781 : }
7782 : }
7783 : else
7784 : {
7785 57 : OGR_RawField_SetNull(psField);
7786 : }
7787 : }
7788 : else
7789 : {
7790 103 : OGRField *psField = oFeature.GetRawFieldRef(iOGRFieldIdx);
7791 103 : switch (asFieldInfo[iArrowIdx].eSetFeatureFieldType)
7792 : {
7793 47 : case OFTRealList:
7794 : case OFTIntegerList:
7795 : case OFTInteger64List:
7796 47 : if (IsValidField(psField))
7797 47 : CPLFree(psField->IntegerList.paList);
7798 47 : break;
7799 :
7800 7 : case OFTStringList:
7801 7 : if (IsValidField(psField))
7802 7 : CSLDestroy(psField->StringList.paList);
7803 7 : break;
7804 :
7805 1 : case OFTBinary:
7806 1 : if (IsValidField(psField))
7807 1 : CPLFree(psField->Binary.paData);
7808 1 : break;
7809 :
7810 48 : default:
7811 48 : break;
7812 : }
7813 103 : OGR_RawField_SetNull(psField);
7814 : }
7815 298 : return true;
7816 : }
7817 : }
7818 :
7819 1322100 : if (asFieldInfo[iArrowIdx].bUseDictionary)
7820 : {
7821 49 : const uint64_t nDictIdx = GetUInt64Value(schema, array, iFeature);
7822 49 : auto dictArray = array->dictionary;
7823 49 : if (nDictIdx >= static_cast<uint64_t>(dictArray->length))
7824 : {
7825 2 : CPLError(CE_Failure, CPLE_AppDefined,
7826 : "Feature %" PRIu64
7827 : ", field %s: invalid dictionary index: %" PRIu64,
7828 : static_cast<uint64_t>(iFeature),
7829 4 : (osFieldPrefix + fieldName).c_str(), nDictIdx);
7830 2 : return false;
7831 : }
7832 47 : array = dictArray;
7833 47 : schema = schema->dictionary;
7834 47 : iFeature = static_cast<size_t>(nDictIdx);
7835 : }
7836 :
7837 1322100 : if (IsBoolean(format))
7838 : {
7839 12 : const uint8_t *pabyValues =
7840 12 : static_cast<const uint8_t *>(array->buffers[1]);
7841 12 : oFeature.SetFieldSameTypeUnsafe(
7842 : iOGRFieldIdx,
7843 12 : TestBit(pabyValues, static_cast<size_t>(iFeature + array->offset))
7844 : ? 1
7845 : : 0);
7846 12 : return true;
7847 : }
7848 1322090 : else if (IsInt8(format))
7849 : {
7850 10 : FillField<int8_t>(array, iOGRFieldIdx, iFeature, oFeature);
7851 10 : return true;
7852 : }
7853 1322080 : else if (IsUInt8(format))
7854 : {
7855 10 : FillField<uint8_t>(array, iOGRFieldIdx, iFeature, oFeature);
7856 10 : return true;
7857 : }
7858 1322070 : else if (IsInt16(format))
7859 : {
7860 12 : FillField<int16_t>(array, iOGRFieldIdx, iFeature, oFeature);
7861 12 : return true;
7862 : }
7863 1322060 : else if (IsUInt16(format))
7864 : {
7865 10 : FillField<uint16_t>(array, iOGRFieldIdx, iFeature, oFeature);
7866 10 : return true;
7867 : }
7868 1322050 : else if (IsInt32(format))
7869 : {
7870 250 : if (iOGRFieldIdx == FID_COLUMN_SPECIAL_OGR_FIELD_IDX)
7871 : {
7872 2 : const auto *panValues =
7873 2 : static_cast<const int32_t *>(array->buffers[1]);
7874 2 : oFeature.SetFID(panValues[iFeature + array->offset]);
7875 : }
7876 : else
7877 : {
7878 248 : FillField<int32_t>(array, iOGRFieldIdx, iFeature, oFeature);
7879 : }
7880 250 : return true;
7881 : }
7882 1321800 : else if (IsUInt32(format))
7883 : {
7884 4 : FillField<uint32_t, GIntBig>(array, iOGRFieldIdx, iFeature, oFeature);
7885 4 : return true;
7886 : }
7887 1321790 : else if (IsInt64(format))
7888 : {
7889 60392 : if (iOGRFieldIdx == FID_COLUMN_SPECIAL_OGR_FIELD_IDX)
7890 : {
7891 60304 : const auto *panValues =
7892 60304 : static_cast<const int64_t *>(array->buffers[1]);
7893 60304 : oFeature.SetFID(panValues[iFeature + array->offset]);
7894 : }
7895 : else
7896 : {
7897 88 : FillField<int64_t, GIntBig>(array, iOGRFieldIdx, iFeature,
7898 : oFeature);
7899 : }
7900 60392 : return true;
7901 : }
7902 1261400 : else if (IsUInt64(format))
7903 : {
7904 10 : FillField<uint64_t, double>(array, iOGRFieldIdx, iFeature, oFeature);
7905 10 : return true;
7906 : }
7907 1261390 : else if (IsFloat32(format))
7908 : {
7909 12 : FillField<float, double>(array, iOGRFieldIdx, iFeature, oFeature);
7910 12 : return true;
7911 : }
7912 1261380 : else if (IsFloat64(format))
7913 : {
7914 58 : FillField<double>(array, iOGRFieldIdx, iFeature, oFeature);
7915 58 : return true;
7916 : }
7917 1261320 : else if (IsString(format))
7918 : {
7919 1200150 : FillFieldString<uint32_t>(array, iOGRFieldIdx, iFeature, iArrowIdx,
7920 : asFieldInfo, osWorkingBuffer, oFeature);
7921 1200150 : return true;
7922 : }
7923 61176 : else if (IsLargeString(format))
7924 : {
7925 10 : FillFieldString<uint64_t>(array, iOGRFieldIdx, iFeature, iArrowIdx,
7926 : asFieldInfo, osWorkingBuffer, oFeature);
7927 10 : return true;
7928 : }
7929 61166 : else if (IsStringView(format))
7930 : {
7931 0 : FillFieldStringView(array, iOGRFieldIdx, iFeature, iArrowIdx,
7932 : asFieldInfo, osWorkingBuffer, oFeature);
7933 0 : return true;
7934 : }
7935 61166 : else if (IsBinary(format))
7936 : {
7937 60328 : return FillFieldBinary<uint32_t>(array, iOGRFieldIdx, iFeature,
7938 : iArrowIdx, asFieldInfo, osFieldPrefix,
7939 60328 : fieldName, oFeature);
7940 : }
7941 838 : else if (IsLargeBinary(format))
7942 : {
7943 16 : return FillFieldBinary<uint64_t>(array, iOGRFieldIdx, iFeature,
7944 : iArrowIdx, asFieldInfo, osFieldPrefix,
7945 16 : fieldName, oFeature);
7946 : }
7947 822 : else if (asFieldInfo[iArrowIdx].nPrecision > 0)
7948 : {
7949 : // fits on a int64
7950 46 : CPLAssert(asFieldInfo[iArrowIdx].nPrecision <= 19);
7951 : // either 128 or 256 bits
7952 46 : CPLAssert((asFieldInfo[iArrowIdx].nWidthInBytes % 8) == 0);
7953 46 : const int nWidthIn64BitWord = asFieldInfo[iArrowIdx].nWidthInBytes / 8;
7954 :
7955 46 : if (IsList(format))
7956 : {
7957 16 : const auto panOffsets =
7958 16 : static_cast<const uint32_t *>(array->buffers[1]) +
7959 16 : array->offset;
7960 16 : const auto childArray = array->children[0];
7961 16 : std::vector<double> aValues;
7962 33 : for (auto i = panOffsets[iFeature]; i < panOffsets[iFeature + 1];
7963 : ++i)
7964 : {
7965 17 : aValues.push_back(GetValueDecimal(childArray, nWidthIn64BitWord,
7966 17 : asFieldInfo[iArrowIdx].nScale,
7967 : i));
7968 : }
7969 16 : oFeature.SetField(iOGRFieldIdx, static_cast<int>(aValues.size()),
7970 16 : aValues.data());
7971 16 : return true;
7972 : }
7973 30 : else if (IsLargeList(format))
7974 : {
7975 4 : const auto panOffsets =
7976 4 : static_cast<const uint64_t *>(array->buffers[1]) +
7977 4 : array->offset;
7978 4 : const auto childArray = array->children[0];
7979 4 : std::vector<double> aValues;
7980 4 : for (auto i = static_cast<size_t>(panOffsets[iFeature]);
7981 9 : i < static_cast<size_t>(panOffsets[iFeature + 1]); ++i)
7982 : {
7983 5 : aValues.push_back(GetValueDecimal(childArray, nWidthIn64BitWord,
7984 5 : asFieldInfo[iArrowIdx].nScale,
7985 : i));
7986 : }
7987 4 : oFeature.SetField(iOGRFieldIdx, static_cast<int>(aValues.size()),
7988 4 : aValues.data());
7989 4 : return true;
7990 : }
7991 26 : else if (IsFixedSizeList(format))
7992 : {
7993 4 : const int nVals = GetFixedSizeList(format);
7994 4 : const auto childArray = array->children[0];
7995 4 : std::vector<double> aValues;
7996 12 : for (int i = 0; i < nVals; ++i)
7997 : {
7998 8 : aValues.push_back(GetValueDecimal(childArray, nWidthIn64BitWord,
7999 8 : asFieldInfo[iArrowIdx].nScale,
8000 8 : iFeature * nVals + i));
8001 : }
8002 4 : oFeature.SetField(iOGRFieldIdx, nVals, aValues.data());
8003 4 : return true;
8004 : }
8005 :
8006 22 : CPLAssert(format[0] == ARROW_LETTER_DECIMAL);
8007 :
8008 22 : oFeature.SetFieldSameTypeUnsafe(
8009 : iOGRFieldIdx,
8010 : GetValueDecimal(array, nWidthIn64BitWord,
8011 22 : asFieldInfo[iArrowIdx].nScale, iFeature));
8012 22 : return true;
8013 : }
8014 776 : else if (SetFieldForOtherFormats(
8015 : oFeature, iOGRFieldIdx,
8016 776 : static_cast<size_t>(iFeature + array->offset), schema, array))
8017 : {
8018 776 : return true;
8019 : }
8020 :
8021 0 : CPLError(CE_Failure, CPLE_NotSupported, "%s",
8022 0 : ("Type '" + std::string(format) + "' for field " + osFieldPrefix +
8023 0 : fieldName + " is not supported.")
8024 : .c_str());
8025 0 : return false;
8026 : }
8027 :
8028 : /************************************************************************/
8029 : /* OGRLayer::WriteArrowBatch() */
8030 : /************************************************************************/
8031 :
8032 : // clang-format off
8033 : /** Writes a batch of rows from an ArrowArray.
8034 : *
8035 : * This is semantically close to calling CreateFeature() with multiple features
8036 : * at once.
8037 : *
8038 : * The ArrowArray must be of type struct (format=+s), and its children generally
8039 : * map to a OGR attribute or geometry field (unless they are struct themselves).
8040 : *
8041 : * Method IsArrowSchemaSupported() can be called to determine if the schema
8042 : * will be supported by WriteArrowBatch().
8043 : *
8044 : * OGR fields for the corresponding children arrays must exist and be of a
8045 : * compatible type. For attribute fields, they should generally be created with
8046 : * CreateFieldFromArrowSchema(). This is strictly required for output drivers
8047 : * Arrow or Parquet, and strongly recommended otherwise. For geometry fields,
8048 : * they should be created either implicitly at CreateLayer() type
8049 : * (if geom_type != wkbNone), or explicitly with CreateGeomField().
8050 : *
8051 : * Starting with GDAL 3.9, some tolerance has been introduced in the base
8052 : * implementation of WriteArrowBatch() for scenarios that involve appending to
8053 : * an already existing output layer when the input Arrow field type and the
8054 : * OGR layer field type are 32/64-bi integers or real number, but do not match
8055 : * exactly, which may cause lossy conversions. The IF_FIELD_NOT_PRESERVED option
8056 : * can be used to control the behavior in case of lossy conversion.
8057 : *
8058 : * Arrays for geometry columns should be of binary or large binary type and
8059 : * contain WKB geometry.
8060 : *
8061 : * Note that the passed array may be set to a released state
8062 : * (array->release==NULL) after this call (not by the base implementation,
8063 : * but in specialized ones such as Parquet or Arrow for example)
8064 : *
8065 : * Supported options of the base implementation are:
8066 : * <ul>
8067 : * <li>FID=name. Name of the FID column in the array. If not provided,
8068 : * GetFIDColumn() is used to determine it. The special name
8069 : * OGRLayer::DEFAULT_ARROW_FID_NAME is also recognized if neither FID nor
8070 : * GetFIDColumn() are set.
8071 : * The corresponding ArrowArray must be of type int32 (i) or int64 (l).
8072 : * On input, values of the FID column are used to create the feature.
8073 : * On output, the values of the FID column may be set with the FID of the
8074 : * created feature (if the array is not released).
8075 : * </li>
8076 : * <li>IF_FID_NOT_PRESERVED=NOTHING/ERROR/WARNING. Action to perform when the
8077 : * input FID is not preserved in the output layer. The default is NOTHING.
8078 : * Setting it to ERROR will cause the function to error out. Setting it
8079 : * to WARNING will cause the function to emit a warning but continue its
8080 : * processing.
8081 : * </li>
8082 : * <li>IF_FIELD_NOT_PRESERVED=ERROR/WARNING. (since GDAL 3.9)
8083 : * Action to perform when the input field value is not preserved in the
8084 : * output layer.
8085 : * The default is WARNING, which will cause the function to emit a warning
8086 : * but continue its processing.
8087 : * Setting it to ERROR will cause the function to error out if a lossy
8088 : * conversion is detected.
8089 : * </li>
8090 : * <li>GEOMETRY_NAME=name. Name of the geometry column. If not provided,
8091 : * GetGeometryColumn() is used. The special name
8092 : * OGRLayer::DEFAULT_ARROW_GEOMETRY_NAME is also recognized if neither
8093 : * GEOMETRY_NAME nor GetGeometryColumn() are set.
8094 : * Geometry columns are also identified if they have
8095 : * ARROW:extension:name=ogc.wkb as a field metadata.
8096 : * The corresponding ArrowArray must be of type binary (w) or large
8097 : * binary (W).
8098 : * </li>
8099 : * </ul>
8100 : *
8101 : * The following example demonstrates how to copy a layer from one format to
8102 : * another one (assuming it has at most a single geometry column):
8103 : \code{.py}
8104 : def copy_layer(src_lyr, out_filename, out_format, lcos = {}):
8105 : stream = src_lyr.GetArrowStream()
8106 : schema = stream.GetSchema()
8107 :
8108 : # If the source layer has a FID column and the output driver supports
8109 : # a FID layer creation option, set it to the source FID column name.
8110 : if src_lyr.GetFIDColumn():
8111 : creationOptions = gdal.GetDriverByName(out_format).GetMetadataItem(
8112 : "DS_LAYER_CREATIONOPTIONLIST"
8113 : )
8114 : if creationOptions and '"FID"' in creationOptions:
8115 : lcos["FID"] = src_lyr.GetFIDColumn()
8116 :
8117 : with ogr.GetDriverByName(out_format).CreateDataSource(out_filename) as out_ds:
8118 : if src_lyr.GetLayerDefn().GetGeomFieldCount() > 1:
8119 : out_lyr = out_ds.CreateLayer(
8120 : src_lyr.GetName(), geom_type=ogr.wkbNone, options=lcos
8121 : )
8122 : for i in range(src_lyr.GetLayerDefn().GetGeomFieldCount()):
8123 : out_lyr.CreateGeomField(src_lyr.GetLayerDefn().GetGeomFieldDefn(i))
8124 : else:
8125 : out_lyr = out_ds.CreateLayer(
8126 : src_lyr.GetName(),
8127 : geom_type=src_lyr.GetGeomType(),
8128 : srs=src_lyr.GetSpatialRef(),
8129 : options=lcos,
8130 : )
8131 :
8132 : success, error_msg = out_lyr.IsArrowSchemaSupported(schema)
8133 : assert success, error_msg
8134 :
8135 : src_geom_field_names = [
8136 : src_lyr.GetLayerDefn().GetGeomFieldDefn(i).GetName()
8137 : for i in range(src_lyr.GetLayerDefn().GetGeomFieldCount())
8138 : ]
8139 : for i in range(schema.GetChildrenCount()):
8140 : # GetArrowStream() may return "OGC_FID" for a unnamed source FID
8141 : # column and "wkb_geometry" for a unnamed source geometry column.
8142 : # Also test GetFIDColumn() and src_geom_field_names if they are
8143 : # named.
8144 : if (
8145 : schema.GetChild(i).GetName()
8146 : not in ("OGC_FID", "wkb_geometry", src_lyr.GetFIDColumn())
8147 : and schema.GetChild(i).GetName() not in src_geom_field_names
8148 : ):
8149 : out_lyr.CreateFieldFromArrowSchema(schema.GetChild(i))
8150 :
8151 : write_options = []
8152 : if src_lyr.GetFIDColumn():
8153 : write_options.append("FID=" + src_lyr.GetFIDColumn())
8154 : if (
8155 : src_lyr.GetLayerDefn().GetGeomFieldCount() == 1
8156 : and src_lyr.GetGeometryColumn()
8157 : ):
8158 : write_options.append("GEOMETRY_NAME=" + src_lyr.GetGeometryColumn())
8159 :
8160 : while True:
8161 : array = stream.GetNextRecordBatch()
8162 : if array is None:
8163 : break
8164 : out_lyr.WriteArrowBatch(schema, array, write_options)
8165 : \endcode
8166 : *
8167 : * This method and CreateFeature() are mutually exclusive in the same session.
8168 : *
8169 : * This method is the same as the C function OGR_L_WriteArrowBatch().
8170 : *
8171 : * @param schema Schema of array
8172 : * @param array Array of type struct. It may be released (array->release==NULL)
8173 : * after calling this method.
8174 : * @param papszOptions Options. Null terminated list, or nullptr.
8175 : * @return true in case of success
8176 : * @since 3.8
8177 : */
8178 : // clang-format on
8179 :
8180 88 : bool OGRLayer::WriteArrowBatch(const struct ArrowSchema *schema,
8181 : struct ArrowArray *array,
8182 : CSLConstList papszOptions)
8183 : {
8184 88 : const char *format = schema->format;
8185 88 : if (!IsStructure(format))
8186 : {
8187 0 : CPLError(CE_Failure, CPLE_AppDefined,
8188 : "WriteArrowBatch() should be called on a schema that is a "
8189 : "struct of fields");
8190 0 : return false;
8191 : }
8192 :
8193 88 : if (schema->n_children != array->n_children)
8194 : {
8195 0 : CPLError(CE_Failure, CPLE_AppDefined,
8196 : "WriteArrowBatch(): schema->n_children (%d) != "
8197 : "array->n_children (%d)",
8198 0 : int(schema->n_children), int(array->n_children));
8199 0 : return false;
8200 : }
8201 :
8202 176 : CPLStringList aosNativeTypes;
8203 88 : auto poDS = const_cast<OGRLayer *>(this)->GetDataset();
8204 88 : if (poDS)
8205 : {
8206 88 : auto poDriver = poDS->GetDriver();
8207 88 : if (poDriver)
8208 : {
8209 : const char *pszMetadataItem =
8210 88 : poDriver->GetMetadataItem(GDAL_DMD_CREATIONFIELDDATATYPES);
8211 88 : if (pszMetadataItem)
8212 88 : aosNativeTypes = CSLTokenizeString2(pszMetadataItem, " ", 0);
8213 : }
8214 : }
8215 :
8216 176 : std::vector<FieldInfo> asFieldInfo;
8217 88 : auto poLayerDefn = GetLayerDefn();
8218 : const char *pszFIDName =
8219 88 : CSLFetchNameValueDef(papszOptions, "FID", GetFIDColumn());
8220 88 : if (!pszFIDName || pszFIDName[0] == 0)
8221 60 : pszFIDName = DEFAULT_ARROW_FID_NAME;
8222 : const bool bErrorIfFIDNotPreserved =
8223 88 : EQUAL(CSLFetchNameValueDef(papszOptions, "IF_FID_NOT_PRESERVED", ""),
8224 : "ERROR");
8225 : const bool bWarningIfFIDNotPreserved =
8226 88 : EQUAL(CSLFetchNameValueDef(papszOptions, "IF_FID_NOT_PRESERVED", ""),
8227 : "WARNING");
8228 : const bool bErrorIfFieldNotPreserved =
8229 88 : EQUAL(CSLFetchNameValueDef(papszOptions, "IF_FIELD_NOT_PRESERVED", ""),
8230 : "ERROR");
8231 88 : const char *pszGeomFieldName = CSLFetchNameValueDef(
8232 88 : papszOptions, "GEOMETRY_NAME", GetGeometryColumn());
8233 88 : if (!pszGeomFieldName || pszGeomFieldName[0] == 0)
8234 59 : pszGeomFieldName = DEFAULT_ARROW_GEOMETRY_NAME;
8235 88 : const struct ArrowSchema *schemaFIDColumn = nullptr;
8236 88 : struct ArrowArray *arrayFIDColumn = nullptr;
8237 88 : bool bFallbackTypesUsed = false;
8238 836 : for (int64_t i = 0; i < schema->n_children; ++i)
8239 : {
8240 749 : if (!BuildOGRFieldInfo(schema->children[i], array->children[i],
8241 749 : poLayerDefn, std::string(), aosNativeTypes,
8242 : bFallbackTypesUsed, asFieldInfo, pszFIDName,
8243 : pszGeomFieldName, this,
8244 749 : m_poPrivate->m_oMapArrowFieldNameToOGRFieldName,
8245 : schemaFIDColumn, arrayFIDColumn))
8246 : {
8247 1 : return false;
8248 : }
8249 : }
8250 :
8251 174 : std::map<int, int> oMapOGRFieldIndexToFieldInfoIndex;
8252 174 : std::vector<bool> abUseStringOptim(poLayerDefn->GetFieldCount(), false);
8253 855 : for (int i = 0; i < static_cast<int>(asFieldInfo.size()); ++i)
8254 : {
8255 768 : if (asFieldInfo[i].iOGRFieldIdx >= 0 && !asFieldInfo[i].bIsGeomCol)
8256 : {
8257 654 : CPLAssert(oMapOGRFieldIndexToFieldInfoIndex.find(
8258 : asFieldInfo[i].iOGRFieldIdx) ==
8259 : oMapOGRFieldIndexToFieldInfoIndex.end());
8260 654 : oMapOGRFieldIndexToFieldInfoIndex[asFieldInfo[i].iOGRFieldIdx] = i;
8261 1308 : abUseStringOptim[asFieldInfo[i].iOGRFieldIdx] =
8262 1308 : asFieldInfo[i].bUseStringOptim;
8263 : }
8264 : }
8265 :
8266 : struct FeatureDefnReleaser
8267 : {
8268 87 : void operator()(OGRFeatureDefn *poFDefn)
8269 : {
8270 87 : if (poFDefn)
8271 87 : poFDefn->Release();
8272 87 : }
8273 : };
8274 :
8275 : std::unique_ptr<OGRFeatureDefn, FeatureDefnReleaser> poLayerDefnTmp(
8276 174 : std::make_unique<OGRFeatureDefn>(poLayerDefn->GetName()).release());
8277 87 : poLayerDefnTmp->Reference();
8278 :
8279 174 : std::vector<int> anIdentityFieldMap;
8280 87 : if (bFallbackTypesUsed)
8281 : {
8282 30 : poLayerDefnTmp->SetGeomType(wkbNone);
8283 101 : for (int i = 0; i < poLayerDefn->GetFieldCount(); ++i)
8284 : {
8285 71 : anIdentityFieldMap.push_back(i);
8286 71 : const auto poSrcFieldDefn = poLayerDefn->GetFieldDefn(i);
8287 71 : const auto oIter = oMapOGRFieldIndexToFieldInfoIndex.find(i);
8288 : OGRFieldDefn oFieldDefn(
8289 : poSrcFieldDefn->GetNameRef(),
8290 71 : oIter == oMapOGRFieldIndexToFieldInfoIndex.end()
8291 1 : ? poSrcFieldDefn->GetType()
8292 143 : : asFieldInfo[oIter->second].eNominalFieldType);
8293 71 : if (oIter != oMapOGRFieldIndexToFieldInfoIndex.end())
8294 70 : asFieldInfo[oIter->second].eSetFeatureFieldType =
8295 70 : asFieldInfo[oIter->second].eNominalFieldType;
8296 71 : poLayerDefnTmp->AddFieldDefn(&oFieldDefn);
8297 : }
8298 59 : for (int i = 0; i < poLayerDefn->GetGeomFieldCount(); ++i)
8299 : {
8300 29 : poLayerDefnTmp->AddGeomFieldDefn(poLayerDefn->GetGeomFieldDefn(i));
8301 : }
8302 : }
8303 : else
8304 : {
8305 723 : for (auto &sFieldInfo : asFieldInfo)
8306 666 : sFieldInfo.eSetFeatureFieldType = sFieldInfo.eTargetFieldType;
8307 : }
8308 :
8309 : struct FeatureCleaner
8310 : {
8311 : OGRFeature &m_oFeature;
8312 : const std::vector<bool> &m_abUseStringOptim;
8313 :
8314 87 : explicit FeatureCleaner(OGRFeature &oFeature,
8315 : const std::vector<bool> &abUseStringOptim)
8316 87 : : m_oFeature(oFeature), m_abUseStringOptim(abUseStringOptim)
8317 : {
8318 87 : }
8319 :
8320 : // As we set a value that can't be CPLFree()'d in the .String member
8321 : // of string fields, we must take care of manually unsetting it before
8322 : // the destructor of OGRFeature gets called.
8323 87 : ~FeatureCleaner()
8324 87 : {
8325 87 : const auto poLayerDefn = m_oFeature.GetDefnRef();
8326 87 : const int nFieldCount = poLayerDefn->GetFieldCount();
8327 745 : for (int i = 0; i < nFieldCount; ++i)
8328 : {
8329 658 : if (m_abUseStringOptim[i])
8330 : {
8331 130 : if (m_oFeature.IsFieldSetAndNotNullUnsafe(i))
8332 102 : m_oFeature.SetFieldSameTypeUnsafe(
8333 : i, static_cast<char *>(nullptr));
8334 : }
8335 : }
8336 87 : }
8337 : };
8338 :
8339 30 : OGRFeature oFeature(bFallbackTypesUsed ? poLayerDefnTmp.get()
8340 204 : : poLayerDefn);
8341 174 : FeatureCleaner oCleaner(oFeature, abUseStringOptim);
8342 174 : OGRFeature oFeatureTarget(poLayerDefn);
8343 87 : OGRFeature *const poFeatureTarget =
8344 87 : bFallbackTypesUsed ? &oFeatureTarget : &oFeature;
8345 :
8346 : // We accumulate the content of all strings in osWorkingBuffer to avoid
8347 : // a few dynamic memory allocations
8348 174 : std::string osWorkingBuffer;
8349 :
8350 : bool bTransactionOK;
8351 : {
8352 87 : CPLErrorStateBackuper oBackuper(CPLQuietErrorHandler);
8353 87 : bTransactionOK = StartTransaction() == OGRERR_NONE;
8354 : }
8355 :
8356 174 : const std::string emptyString;
8357 87 : int64_t fidNullCount = 0;
8358 60484 : for (size_t iFeature = 0; iFeature < static_cast<size_t>(array->length);
8359 : ++iFeature)
8360 : {
8361 60407 : oFeature.SetFID(OGRNullFID);
8362 :
8363 60407 : int iArrowIdx = 0;
8364 60407 : const size_t nWorkingBufferSize = GetWorkingBufferSize(
8365 : schema, array, iFeature, iArrowIdx, asFieldInfo);
8366 60407 : osWorkingBuffer.clear();
8367 60407 : osWorkingBuffer.reserve(nWorkingBufferSize);
8368 : #ifdef DEBUG
8369 60407 : const char *pszWorkingBuffer = osWorkingBuffer.c_str();
8370 60407 : CPL_IGNORE_RET_VAL(pszWorkingBuffer);
8371 : #endif
8372 60407 : iArrowIdx = 0;
8373 1382770 : for (int64_t i = 0; i < schema->n_children; ++i)
8374 : {
8375 1322360 : if (!FillFeature(this, schema->children[i], array->children[i],
8376 : emptyString, iFeature, iArrowIdx, asFieldInfo,
8377 : oFeature, osWorkingBuffer))
8378 : {
8379 2 : if (bTransactionOK)
8380 2 : RollbackTransaction();
8381 10 : return false;
8382 : }
8383 : }
8384 : #ifdef DEBUG
8385 : // Check that the buffer didn't get reallocated
8386 60405 : CPLAssert(pszWorkingBuffer == osWorkingBuffer.c_str());
8387 60405 : CPLAssert(osWorkingBuffer.size() == nWorkingBufferSize);
8388 : #endif
8389 :
8390 60405 : if (bFallbackTypesUsed)
8391 : {
8392 48 : oFeatureTarget.SetFrom(&oFeature, anIdentityFieldMap.data(),
8393 : /*bForgiving=*/true,
8394 : /*bUseISO8601ForDateTimeAsString=*/true);
8395 48 : oFeatureTarget.SetFID(oFeature.GetFID());
8396 :
8397 48 : if (bErrorIfFieldNotPreserved)
8398 : {
8399 26 : for (int i = 0; i < poLayerDefn->GetFieldCount(); ++i)
8400 : {
8401 16 : if (!oFeature.IsFieldSetAndNotNullUnsafe(i))
8402 : {
8403 4 : continue;
8404 : }
8405 12 : bool bLossyConversion = false;
8406 : const auto eSrcType =
8407 12 : poLayerDefnTmp->GetFieldDefnUnsafe(i)->GetType();
8408 : const auto eDstType =
8409 12 : poLayerDefn->GetFieldDefnUnsafe(i)->GetType();
8410 :
8411 : const auto IsDoubleCastToInt64EqualTInt64 =
8412 2 : [](double dfVal, int64_t nOtherVal)
8413 : {
8414 : // Values in the range [INT64_MAX - 1023, INT64_MAX - 1]
8415 : // get converted to a double that once cast to int64_t
8416 : // is INT64_MAX + 1, hence the strict < comparison
8417 : return dfVal >=
8418 2 : static_cast<double>(
8419 2 : std::numeric_limits<int64_t>::min()) &&
8420 : dfVal <
8421 2 : static_cast<double>(
8422 4 : std::numeric_limits<int64_t>::max()) &&
8423 3 : static_cast<int64_t>(dfVal) == nOtherVal;
8424 : };
8425 :
8426 14 : if (eSrcType == OFTInteger64 && eDstType == OFTInteger &&
8427 2 : oFeatureTarget.GetFieldAsIntegerUnsafe(i) !=
8428 2 : oFeature.GetFieldAsInteger64Unsafe(i))
8429 : {
8430 1 : bLossyConversion = true;
8431 : }
8432 14 : else if (eSrcType == OFTReal && eDstType == OFTInteger &&
8433 3 : oFeatureTarget.GetFieldAsIntegerUnsafe(i) !=
8434 3 : oFeature.GetFieldAsDoubleUnsafe(i))
8435 : {
8436 2 : bLossyConversion = true;
8437 : }
8438 12 : else if (eSrcType == OFTReal && eDstType == OFTInteger64 &&
8439 3 : static_cast<double>(
8440 3 : oFeatureTarget.GetFieldAsInteger64Unsafe(i)) !=
8441 3 : oFeature.GetFieldAsDoubleUnsafe(i))
8442 : {
8443 2 : bLossyConversion = true;
8444 : }
8445 9 : else if (eSrcType == OFTInteger64 && eDstType == OFTReal &&
8446 2 : !IsDoubleCastToInt64EqualTInt64(
8447 : oFeatureTarget.GetFieldAsDoubleUnsafe(i),
8448 2 : oFeature.GetFieldAsInteger64Unsafe(i)))
8449 : {
8450 1 : bLossyConversion = true;
8451 : }
8452 12 : if (bLossyConversion)
8453 : {
8454 6 : CPLError(CE_Failure, CPLE_AppDefined,
8455 : "For feature " CPL_FRMT_GIB
8456 : ", value of field %s cannot not preserved",
8457 : oFeatureTarget.GetFID(),
8458 6 : poLayerDefnTmp->GetFieldDefn(i)->GetNameRef());
8459 6 : if (bTransactionOK)
8460 6 : RollbackTransaction();
8461 6 : return false;
8462 : }
8463 : }
8464 : }
8465 : }
8466 :
8467 60399 : const auto nInputFID = poFeatureTarget->GetFID();
8468 60399 : if (CreateFeature(poFeatureTarget) != OGRERR_NONE)
8469 : {
8470 1 : if (bTransactionOK)
8471 1 : RollbackTransaction();
8472 1 : return false;
8473 : }
8474 60398 : if (nInputFID != OGRNullFID)
8475 : {
8476 120587 : if (bWarningIfFIDNotPreserved &&
8477 : // cppcheck-suppress knownConditionTrueFalse
8478 60282 : poFeatureTarget->GetFID() != nInputFID)
8479 : {
8480 2 : CPLError(CE_Warning, CPLE_AppDefined,
8481 : "Feature id " CPL_FRMT_GIB " not preserved",
8482 : nInputFID);
8483 : }
8484 60304 : else if (bErrorIfFIDNotPreserved &&
8485 : // cppcheck-suppress knownConditionTrueFalse
8486 1 : poFeatureTarget->GetFID() != nInputFID)
8487 : {
8488 1 : CPLError(CE_Failure, CPLE_AppDefined,
8489 : "Feature id " CPL_FRMT_GIB " not preserved",
8490 : nInputFID);
8491 1 : if (bTransactionOK)
8492 1 : RollbackTransaction();
8493 1 : return false;
8494 : }
8495 : }
8496 :
8497 60397 : if (arrayFIDColumn)
8498 : {
8499 60309 : uint8_t *pabyValidity = static_cast<uint8_t *>(
8500 60309 : const_cast<void *>(arrayFIDColumn->buffers[0]));
8501 60309 : if (IsInt32(schemaFIDColumn->format))
8502 : {
8503 6 : auto *panValues = static_cast<int32_t *>(
8504 6 : const_cast<void *>(arrayFIDColumn->buffers[1]));
8505 6 : if (poFeatureTarget->GetFID() >
8506 6 : std::numeric_limits<int32_t>::max())
8507 : {
8508 0 : if (pabyValidity)
8509 : {
8510 0 : ++fidNullCount;
8511 0 : UnsetBit(pabyValidity,
8512 0 : static_cast<size_t>(iFeature +
8513 0 : arrayFIDColumn->offset));
8514 : }
8515 0 : CPLError(CE_Warning, CPLE_AppDefined,
8516 : "FID " CPL_FRMT_GIB
8517 : " cannot be stored in FID array of type int32",
8518 : poFeatureTarget->GetFID());
8519 : }
8520 : else
8521 : {
8522 6 : if (pabyValidity)
8523 : {
8524 5 : SetBit(pabyValidity,
8525 5 : static_cast<size_t>(iFeature +
8526 5 : arrayFIDColumn->offset));
8527 : }
8528 6 : panValues[iFeature + arrayFIDColumn->offset] =
8529 6 : static_cast<int32_t>(poFeatureTarget->GetFID());
8530 : }
8531 : }
8532 60303 : else if (IsInt64(schemaFIDColumn->format))
8533 : {
8534 60303 : if (pabyValidity)
8535 : {
8536 0 : SetBit(
8537 : pabyValidity,
8538 0 : static_cast<size_t>(iFeature + arrayFIDColumn->offset));
8539 : }
8540 60303 : auto *panValues = static_cast<int64_t *>(
8541 60303 : const_cast<void *>(arrayFIDColumn->buffers[1]));
8542 60303 : panValues[iFeature + arrayFIDColumn->offset] =
8543 60303 : poFeatureTarget->GetFID();
8544 : }
8545 : else
8546 : {
8547 0 : CPLAssert(false);
8548 : }
8549 : }
8550 : }
8551 77 : if (arrayFIDColumn && arrayFIDColumn->buffers[0])
8552 : {
8553 1 : arrayFIDColumn->null_count = fidNullCount;
8554 : }
8555 :
8556 77 : bool bRet = true;
8557 77 : if (bTransactionOK)
8558 66 : bRet = CommitTransaction() == OGRERR_NONE;
8559 :
8560 77 : return bRet;
8561 : }
8562 :
8563 : /************************************************************************/
8564 : /* OGR_L_WriteArrowBatch() */
8565 : /************************************************************************/
8566 :
8567 : // clang-format off
8568 : /** Writes a batch of rows from an ArrowArray.
8569 : *
8570 : * This is semantically close to calling CreateFeature() with multiple features
8571 : * at once.
8572 : *
8573 : * The ArrowArray must be of type struct (format=+s), and its children generally
8574 : * map to a OGR attribute or geometry field (unless they are struct themselves).
8575 : *
8576 : * Method IsArrowSchemaSupported() can be called to determine if the schema
8577 : * will be supported by WriteArrowBatch().
8578 : *
8579 : * OGR fields for the corresponding children arrays must exist and be of a
8580 : * compatible type. For attribute fields, they should generally be created with
8581 : * CreateFieldFromArrowSchema(). This is strictly required for output drivers
8582 : * Arrow or Parquet, and strongly recommended otherwise. For geometry fields,
8583 : * they should be created either implicitly at CreateLayer() type
8584 : * (if geom_type != wkbNone), or explicitly with CreateGeomField().
8585 : *
8586 : * Starting with GDAL 3.9, some tolerance has been introduced in the base
8587 : * implementation of WriteArrowBatch() for scenarios that involve appending to
8588 : * an already existing output layer when the input Arrow field type and the
8589 : * OGR layer field type are 32/64-bi integers or real number, but do not match
8590 : * exactly, which may cause lossy conversions. The IF_FIELD_NOT_PRESERVED option
8591 : * can be used to control the behavior in case of lossy conversion.
8592 : *
8593 : * Arrays for geometry columns should be of binary or large binary type and
8594 : * contain WKB geometry.
8595 : *
8596 : * Note that the passed array may be set to a released state
8597 : * (array->release==NULL) after this call (not by the base implementation,
8598 : * but in specialized ones such as Parquet or Arrow for example)
8599 : *
8600 : * Supported options of the base implementation are:
8601 : * <ul>
8602 : * <li>FID=name. Name of the FID column in the array. If not provided,
8603 : * GetFIDColumn() is used to determine it. The special name
8604 : * OGRLayer::DEFAULT_ARROW_FID_NAME is also recognized if neither FID nor
8605 : * GetFIDColumn() are set.
8606 : * The corresponding ArrowArray must be of type int32 (i) or int64 (l).
8607 : * On input, values of the FID column are used to create the feature.
8608 : * On output, the values of the FID column may be set with the FID of the
8609 : * created feature (if the array is not released).
8610 : * </li>
8611 : * <li>IF_FID_NOT_PRESERVED=NOTHING/ERROR/WARNING. Action to perform when the
8612 : * input FID is not preserved in the output layer. The default is NOTHING.
8613 : * Setting it to ERROR will cause the function to error out. Setting it
8614 : * to WARNING will cause the function to emit a warning but continue its
8615 : * processing.
8616 : * </li>
8617 : * <li>IF_FIELD_NOT_PRESERVED=ERROR/WARNING. (since GDAL 3.9)
8618 : * Action to perform when the input field value is not preserved in the
8619 : * output layer.
8620 : * The default is WARNING, which will cause the function to emit a warning
8621 : * but continue its processing.
8622 : * Setting it to ERROR will cause the function to error out if a lossy
8623 : * conversion is detected.
8624 : * </li>
8625 : * <li>GEOMETRY_NAME=name. Name of the geometry column. If not provided,
8626 : * GetGeometryColumn() is used. The special name
8627 : * OGRLayer::DEFAULT_ARROW_GEOMETRY_NAME is also recognized if neither
8628 : * GEOMETRY_NAME nor GetGeometryColumn() are set.
8629 : * Geometry columns are also identified if they have
8630 : * ARROW:extension:name=ogc.wkb as a field metadata.
8631 : * The corresponding ArrowArray must be of type binary (w) or large
8632 : * binary (W).
8633 : * </li>
8634 : * </ul>
8635 : *
8636 : * The following example demonstrates how to copy a layer from one format to
8637 : * another one (assuming it has at most a single geometry column):
8638 : \code{.py}
8639 : def copy_layer(src_lyr, out_filename, out_format, lcos = {}):
8640 : stream = src_lyr.GetArrowStream()
8641 : schema = stream.GetSchema()
8642 :
8643 : # If the source layer has a FID column and the output driver supports
8644 : # a FID layer creation option, set it to the source FID column name.
8645 : if src_lyr.GetFIDColumn():
8646 : creationOptions = gdal.GetDriverByName(out_format).GetMetadataItem(
8647 : "DS_LAYER_CREATIONOPTIONLIST"
8648 : )
8649 : if creationOptions and '"FID"' in creationOptions:
8650 : lcos["FID"] = src_lyr.GetFIDColumn()
8651 :
8652 : with ogr.GetDriverByName(out_format).CreateDataSource(out_filename) as out_ds:
8653 : if src_lyr.GetLayerDefn().GetGeomFieldCount() > 1:
8654 : out_lyr = out_ds.CreateLayer(
8655 : src_lyr.GetName(), geom_type=ogr.wkbNone, options=lcos
8656 : )
8657 : for i in range(src_lyr.GetLayerDefn().GetGeomFieldCount()):
8658 : out_lyr.CreateGeomField(src_lyr.GetLayerDefn().GetGeomFieldDefn(i))
8659 : else:
8660 : out_lyr = out_ds.CreateLayer(
8661 : src_lyr.GetName(),
8662 : geom_type=src_lyr.GetGeomType(),
8663 : srs=src_lyr.GetSpatialRef(),
8664 : options=lcos,
8665 : )
8666 :
8667 : success, error_msg = out_lyr.IsArrowSchemaSupported(schema)
8668 : assert success, error_msg
8669 :
8670 : src_geom_field_names = [
8671 : src_lyr.GetLayerDefn().GetGeomFieldDefn(i).GetName()
8672 : for i in range(src_lyr.GetLayerDefn().GetGeomFieldCount())
8673 : ]
8674 : for i in range(schema.GetChildrenCount()):
8675 : # GetArrowStream() may return "OGC_FID" for a unnamed source FID
8676 : # column and "wkb_geometry" for a unnamed source geometry column.
8677 : # Also test GetFIDColumn() and src_geom_field_names if they are
8678 : # named.
8679 : if (
8680 : schema.GetChild(i).GetName()
8681 : not in ("OGC_FID", "wkb_geometry", src_lyr.GetFIDColumn())
8682 : and schema.GetChild(i).GetName() not in src_geom_field_names
8683 : ):
8684 : out_lyr.CreateFieldFromArrowSchema(schema.GetChild(i))
8685 :
8686 : write_options = []
8687 : if src_lyr.GetFIDColumn():
8688 : write_options.append("FID=" + src_lyr.GetFIDColumn())
8689 : if (
8690 : src_lyr.GetLayerDefn().GetGeomFieldCount() == 1
8691 : and src_lyr.GetGeometryColumn()
8692 : ):
8693 : write_options.append("GEOMETRY_NAME=" + src_lyr.GetGeometryColumn())
8694 :
8695 : while True:
8696 : array = stream.GetNextRecordBatch()
8697 : if array is None:
8698 : break
8699 : out_lyr.WriteArrowBatch(schema, array, write_options)
8700 : \endcode
8701 : *
8702 : * This method and CreateFeature() are mutually exclusive in the same session.
8703 : *
8704 : * This method is the same as the C++ method OGRLayer::WriteArrowBatch().
8705 : *
8706 : * @param hLayer Layer.
8707 : * @param schema Schema of array.
8708 : * @param array Array of type struct. It may be released (array->release==NULL)
8709 : * after calling this method.
8710 : * @param papszOptions Options. Null terminated list, or nullptr.
8711 : * @return true in case of success
8712 : * @since 3.8
8713 : */
8714 : // clang-format on
8715 :
8716 59 : bool OGR_L_WriteArrowBatch(OGRLayerH hLayer, const struct ArrowSchema *schema,
8717 : struct ArrowArray *array, CSLConstList papszOptions)
8718 : {
8719 59 : VALIDATE_POINTER1(hLayer, __func__, false);
8720 59 : VALIDATE_POINTER1(schema, __func__, false);
8721 59 : VALIDATE_POINTER1(array, __func__, false);
8722 :
8723 118 : return OGRLayer::FromHandle(hLayer)->WriteArrowBatch(schema, array,
8724 59 : papszOptions);
8725 : }
|