Line data Source code
1 : /******************************************************************************
2 : *
3 : * Project: OpenGIS Simple Features Reference Implementation
4 : * Purpose: Parts of OGRLayer dealing with Arrow C interface
5 : * Author: Even Rouault, <even dot rouault at spatialys.com>
6 : *
7 : ******************************************************************************
8 : * Copyright (c) 2022-2023, Even Rouault <even dot rouault at spatialys.com>
9 : *
10 : * SPDX-License-Identifier: MIT
11 : ****************************************************************************/
12 :
13 : #include "ogrsf_frmts.h"
14 : #include "ogr_api.h"
15 : #include "ogr_recordbatch.h"
16 : #include "ograrrowarrayhelper.h"
17 : #include "ogrlayerarrow.h"
18 : #include "ogr_p.h"
19 : #include "ogr_swq.h"
20 : #include "ogr_wkb.h"
21 : #include "ogr_p.h"
22 : #include "ogrlayer_private.h"
23 :
24 : #include "cpl_float.h"
25 : #include "cpl_json.h"
26 : #include "cpl_time.h"
27 :
28 : #include <algorithm>
29 : #include <cassert>
30 : #include <cinttypes>
31 : #include <limits>
32 : #include <utility>
33 : #include <set>
34 : #include <string_view>
35 : #include <type_traits>
36 :
37 : constexpr const char *MD_GDAL_OGR_TYPE = "GDAL:OGR:type";
38 : constexpr const char *MD_GDAL_OGR_ALTERNATIVE_NAME =
39 : "GDAL:OGR:alternative_name";
40 : constexpr const char *MD_GDAL_OGR_COMMENT = "GDAL:OGR:comment";
41 : constexpr const char *MD_GDAL_OGR_DEFAULT = "GDAL:OGR:default";
42 : constexpr const char *MD_GDAL_OGR_SUBTYPE = "GDAL:OGR:subtype";
43 : constexpr const char *MD_GDAL_OGR_WIDTH = "GDAL:OGR:width";
44 : constexpr const char *MD_GDAL_OGR_UNIQUE = "GDAL:OGR:unique";
45 : constexpr const char *MD_GDAL_OGR_DOMAIN_NAME = "GDAL:OGR:domain_name";
46 :
47 : constexpr char ARROW_LETTER_BOOLEAN = 'b';
48 : constexpr char ARROW_LETTER_INT8 = 'c';
49 : constexpr char ARROW_LETTER_UINT8 = 'C';
50 : constexpr char ARROW_LETTER_INT16 = 's';
51 : constexpr char ARROW_LETTER_UINT16 = 'S';
52 : constexpr char ARROW_LETTER_INT32 = 'i';
53 : constexpr char ARROW_LETTER_UINT32 = 'I';
54 : constexpr char ARROW_LETTER_INT64 = 'l';
55 : constexpr char ARROW_LETTER_UINT64 = 'L';
56 : constexpr char ARROW_LETTER_FLOAT16 = 'e';
57 : constexpr char ARROW_LETTER_FLOAT32 = 'f';
58 : constexpr char ARROW_LETTER_FLOAT64 = 'g';
59 : constexpr char ARROW_LETTER_STRING = 'u';
60 : constexpr char ARROW_LETTER_LARGE_STRING = 'U';
61 : constexpr char ARROW_LETTER_BINARY = 'z';
62 : constexpr char ARROW_LETTER_LARGE_BINARY = 'Z';
63 : constexpr char ARROW_LETTER_DECIMAL = 'd';
64 : constexpr char ARROW_2ND_LETTER_LIST = 'l';
65 : constexpr char ARROW_2ND_LETTER_LARGE_LIST = 'L';
66 :
67 : constexpr int N_VALUES_PER_STRING_VIEW = 4;
68 :
69 2753240 : static inline bool IsStructure(const char *format)
70 : {
71 2753240 : return format[0] == '+' && format[1] == 's' && format[2] == 0;
72 : }
73 :
74 23350 : static inline bool IsMap(const char *format)
75 : {
76 23350 : return format[0] == '+' && format[1] == 'm' && format[2] == 0;
77 : }
78 :
79 3145 : static inline bool IsFixedWidthBinary(const char *format)
80 : {
81 3145 : return format[0] == 'w' && format[1] == ':';
82 : }
83 :
84 202 : static inline int GetFixedWithBinary(const char *format)
85 : {
86 202 : return atoi(format + strlen("w:"));
87 : }
88 :
89 30557 : static inline bool IsList(const char *format)
90 : {
91 36678 : return format[0] == '+' && format[1] == ARROW_2ND_LETTER_LIST &&
92 36678 : format[2] == 0;
93 : }
94 :
95 20324 : static inline bool IsLargeList(const char *format)
96 : {
97 20448 : return format[0] == '+' && format[1] == ARROW_2ND_LETTER_LARGE_LIST &&
98 20448 : format[2] == 0;
99 : }
100 :
101 33414 : static inline bool IsFixedSizeList(const char *format)
102 : {
103 33414 : return format[0] == '+' && format[1] == 'w' && format[2] == ':';
104 : }
105 :
106 2311 : static inline int GetFixedSizeList(const char *format)
107 : {
108 2311 : return atoi(format + strlen("+w:"));
109 : }
110 :
111 2800 : static inline bool IsDecimal(const char *format)
112 : {
113 2800 : return format[0] == ARROW_LETTER_DECIMAL && format[1] == ':';
114 : }
115 :
116 1342140 : static inline bool IsBoolean(const char *format)
117 : {
118 1342140 : return format[0] == ARROW_LETTER_BOOLEAN && format[1] == 0;
119 : }
120 :
121 1338900 : static inline bool IsInt8(const char *format)
122 : {
123 1338900 : return format[0] == ARROW_LETTER_INT8 && format[1] == 0;
124 : }
125 :
126 1339020 : static inline bool IsUInt8(const char *format)
127 : {
128 1339020 : return format[0] == ARROW_LETTER_UINT8 && format[1] == 0;
129 : }
130 :
131 1337600 : static inline bool IsInt16(const char *format)
132 : {
133 1337600 : return format[0] == ARROW_LETTER_INT16 && format[1] == 0;
134 : }
135 :
136 1337700 : static inline bool IsUInt16(const char *format)
137 : {
138 1337700 : return format[0] == ARROW_LETTER_UINT16 && format[1] == 0;
139 : }
140 :
141 1396970 : static inline bool IsInt32(const char *format)
142 : {
143 1396970 : return format[0] == ARROW_LETTER_INT32 && format[1] == 0;
144 : }
145 :
146 1336060 : static inline bool IsUInt32(const char *format)
147 : {
148 1336060 : return format[0] == ARROW_LETTER_UINT32 && format[1] == 0;
149 : }
150 :
151 1389670 : static inline bool IsInt64(const char *format)
152 : {
153 1389670 : return format[0] == ARROW_LETTER_INT64 && format[1] == 0;
154 : }
155 :
156 1268540 : static inline bool IsUInt64(const char *format)
157 : {
158 1268540 : return format[0] == ARROW_LETTER_UINT64 && format[1] == 0;
159 : }
160 :
161 15140 : static inline bool IsFloat16(const char *format)
162 : {
163 15140 : return format[0] == ARROW_LETTER_FLOAT16 && format[1] == 0;
164 : }
165 :
166 1274950 : static inline bool IsFloat32(const char *format)
167 : {
168 1274950 : return format[0] == ARROW_LETTER_FLOAT32 && format[1] == 0;
169 : }
170 :
171 1266610 : static inline bool IsFloat64(const char *format)
172 : {
173 1266610 : return format[0] == ARROW_LETTER_FLOAT64 && format[1] == 0;
174 : }
175 :
176 2485410 : static inline bool IsString(const char *format)
177 : {
178 2485410 : return format[0] == ARROW_LETTER_STRING && format[1] == 0;
179 : }
180 :
181 61894 : static inline bool IsStringView(const char *format)
182 : {
183 61894 : return format[0] == 'v' && format[1] == ARROW_LETTER_STRING &&
184 61894 : format[2] == 0;
185 : }
186 :
187 74115 : static inline bool IsLargeString(const char *format)
188 : {
189 74115 : return format[0] == ARROW_LETTER_LARGE_STRING && format[1] == 0;
190 : }
191 :
192 79416 : static inline bool IsBinary(const char *format)
193 : {
194 79416 : return format[0] == ARROW_LETTER_BINARY && format[1] == 0;
195 : }
196 :
197 13002 : static inline bool IsLargeBinary(const char *format)
198 : {
199 13002 : return format[0] == ARROW_LETTER_LARGE_BINARY && format[1] == 0;
200 : }
201 :
202 14724 : static inline bool IsTimestampInternal(const char *format, char chType)
203 : {
204 16441 : return format[0] == 't' && format[1] == 's' && format[2] == chType &&
205 16441 : format[3] == ':';
206 : }
207 :
208 4399 : static inline bool IsTimestampSeconds(const char *format)
209 : {
210 4399 : return IsTimestampInternal(format, 's');
211 : }
212 :
213 4389 : static inline bool IsTimestampMilliseconds(const char *format)
214 : {
215 4389 : return IsTimestampInternal(format, 'm');
216 : }
217 :
218 3244 : static inline bool IsTimestampMicroseconds(const char *format)
219 : {
220 3244 : return IsTimestampInternal(format, 'u');
221 : }
222 :
223 2692 : static inline bool IsTimestampNanoseconds(const char *format)
224 : {
225 2692 : return IsTimestampInternal(format, 'n');
226 : }
227 :
228 3641 : static inline bool IsTimestamp(const char *format)
229 : {
230 9831 : return IsTimestampSeconds(format) || IsTimestampMilliseconds(format) ||
231 9831 : IsTimestampMicroseconds(format) || IsTimestampNanoseconds(format);
232 : }
233 :
234 107 : static inline const char *GetTimestampTimezone(const char *format)
235 : {
236 107 : return IsTimestamp(format) ? format + strlen("tm?:") : "";
237 : }
238 :
239 : /************************************************************************/
240 : /* TestBit() */
241 : /************************************************************************/
242 :
243 13052 : inline bool TestBit(const uint8_t *pabyData, size_t nIdx)
244 : {
245 13052 : return (pabyData[nIdx / 8] & (1 << (nIdx % 8))) != 0;
246 : }
247 :
248 : /************************************************************************/
249 : /* SetBit() */
250 : /************************************************************************/
251 :
252 9676 : inline void SetBit(uint8_t *pabyData, size_t nIdx)
253 : {
254 9676 : pabyData[nIdx / 8] |= (1 << (nIdx % 8));
255 9676 : }
256 :
257 : /************************************************************************/
258 : /* UnsetBit() */
259 : /************************************************************************/
260 :
261 12383 : inline void UnsetBit(uint8_t *pabyData, size_t nIdx)
262 : {
263 12383 : pabyData[nIdx / 8] &= uint8_t(~(1 << (nIdx % 8)));
264 12383 : }
265 :
266 : /************************************************************************/
267 : /* DefaultReleaseSchema() */
268 : /************************************************************************/
269 :
270 25520 : static void OGRLayerReleaseSchema(struct ArrowSchema *schema,
271 : bool bFullFreeFormat)
272 : {
273 25520 : CPLAssert(schema->release != nullptr);
274 25520 : if (bFullFreeFormat || STARTS_WITH(schema->format, "w:") ||
275 25488 : STARTS_WITH(schema->format, "tsm:"))
276 : {
277 1039 : CPLFree(const_cast<char *>(schema->format));
278 : }
279 25520 : CPLFree(const_cast<char *>(schema->name));
280 25520 : CPLFree(const_cast<char *>(schema->metadata));
281 25520 : if (schema->children)
282 : {
283 26174 : for (int i = 0; i < static_cast<int>(schema->n_children); ++i)
284 : {
285 22899 : if (schema->children[i] && schema->children[i]->release)
286 : {
287 22899 : schema->children[i]->release(schema->children[i]);
288 22899 : CPLFree(schema->children[i]);
289 : }
290 : }
291 3275 : CPLFree(schema->children);
292 : }
293 25520 : if (schema->dictionary)
294 : {
295 32 : if (schema->dictionary->release)
296 : {
297 32 : schema->dictionary->release(schema->dictionary);
298 32 : CPLFree(schema->dictionary);
299 : }
300 : }
301 25520 : schema->release = nullptr;
302 25520 : }
303 :
304 25497 : static void OGRLayerPartialReleaseSchema(struct ArrowSchema *schema)
305 : {
306 25497 : OGRLayerReleaseSchema(schema, /* bFullFreeFormat = */ false);
307 25497 : }
308 :
309 23 : static void OGRLayerFullReleaseSchema(struct ArrowSchema *schema)
310 : {
311 23 : OGRLayerReleaseSchema(schema, /* bFullFreeFormat = */ true);
312 23 : }
313 :
314 : /** Release a ArrowSchema.
315 : *
316 : * To be used by driver implementations that have a custom GetArrowStream()
317 : * implementation.
318 : *
319 : * @param schema Schema to release.
320 : * @since GDAL 3.6
321 : */
322 :
323 25465 : void OGRLayer::ReleaseSchema(struct ArrowSchema *schema)
324 : {
325 25465 : OGRLayerPartialReleaseSchema(schema);
326 25465 : }
327 :
328 : /************************************************************************/
329 : /* AddDictToSchema() */
330 : /************************************************************************/
331 :
332 32 : static void AddDictToSchema(struct ArrowSchema *psChild,
333 : const OGRCodedFieldDomain *poCodedDomain)
334 : {
335 32 : const OGRCodedValue *psIter = poCodedDomain->GetEnumeration();
336 32 : int nLastCode = -1;
337 32 : int nCountNull = 0;
338 32 : uint32_t nCountChars = 0;
339 112 : for (; psIter->pszCode; ++psIter)
340 : {
341 80 : if (CPLGetValueType(psIter->pszCode) != CPL_VALUE_INTEGER)
342 : {
343 0 : return;
344 : }
345 80 : int nCode = atoi(psIter->pszCode);
346 80 : if (nCode <= nLastCode || nCode - nLastCode > 100)
347 : {
348 0 : return;
349 : }
350 106 : for (int i = nLastCode + 1; i < nCode; ++i)
351 : {
352 26 : nCountNull++;
353 : }
354 80 : if (psIter->pszValue != nullptr)
355 : {
356 54 : const size_t nLen = strlen(psIter->pszValue);
357 54 : if (nLen > std::numeric_limits<uint32_t>::max() - nCountChars)
358 0 : return;
359 54 : nCountChars += static_cast<uint32_t>(nLen);
360 : }
361 : else
362 26 : nCountNull++;
363 80 : nLastCode = nCode;
364 : }
365 :
366 : auto psChildDict = static_cast<struct ArrowSchema *>(
367 32 : CPLCalloc(1, sizeof(struct ArrowSchema)));
368 32 : psChild->dictionary = psChildDict;
369 32 : psChildDict->release = OGRLayerPartialReleaseSchema;
370 32 : psChildDict->name = CPLStrdup(poCodedDomain->GetName().c_str());
371 32 : psChildDict->format = "u";
372 32 : if (nCountNull)
373 26 : psChildDict->flags = ARROW_FLAG_NULLABLE;
374 : }
375 :
376 : /************************************************************************/
377 : /* DefaultGetArrowSchema() */
378 : /************************************************************************/
379 :
380 : /** Default implementation of the ArrowArrayStream::get_schema() callback.
381 : *
382 : * To be used by driver implementations that have a custom GetArrowStream()
383 : * implementation.
384 : *
385 : * @since GDAL 3.6
386 : */
387 2255 : int OGRLayer::GetArrowSchema(struct ArrowArrayStream *,
388 : struct ArrowSchema *out_schema)
389 : {
390 2255 : const bool bIncludeFID = CPLTestBool(
391 : m_aosArrowArrayStreamOptions.FetchNameValueDef("INCLUDE_FID", "YES"));
392 2255 : const bool bDateTimeAsString = m_aosArrowArrayStreamOptions.FetchBool(
393 : GAS_OPT_DATETIME_AS_STRING, false);
394 2255 : memset(out_schema, 0, sizeof(*out_schema));
395 2255 : out_schema->format = "+s";
396 2255 : out_schema->name = CPLStrdup("");
397 2255 : out_schema->metadata = nullptr;
398 2255 : auto poLayerDefn = GetLayerDefn();
399 2255 : const int nFieldCount = poLayerDefn->GetFieldCount();
400 2255 : const int nGeomFieldCount = poLayerDefn->GetGeomFieldCount();
401 2255 : const int nChildren = 1 + nFieldCount + nGeomFieldCount;
402 :
403 2255 : out_schema->children = static_cast<struct ArrowSchema **>(
404 2255 : CPLCalloc(nChildren, sizeof(struct ArrowSchema *)));
405 2255 : int iSchemaChild = 0;
406 2255 : if (bIncludeFID)
407 : {
408 3982 : out_schema->children[iSchemaChild] = static_cast<struct ArrowSchema *>(
409 1991 : CPLCalloc(1, sizeof(struct ArrowSchema)));
410 1991 : auto psChild = out_schema->children[iSchemaChild];
411 1991 : ++iSchemaChild;
412 1991 : psChild->release = OGRLayer::ReleaseSchema;
413 1991 : const char *pszFIDName = GetFIDColumn();
414 1991 : psChild->name =
415 1991 : CPLStrdup((pszFIDName && pszFIDName[0]) ? pszFIDName
416 : : DEFAULT_ARROW_FID_NAME);
417 1991 : psChild->format = "l";
418 : }
419 20219 : for (int i = 0; i < nFieldCount; ++i)
420 : {
421 17964 : const auto poFieldDefn = poLayerDefn->GetFieldDefn(i);
422 17964 : if (poFieldDefn->IsIgnored())
423 : {
424 48 : continue;
425 : }
426 :
427 35832 : out_schema->children[iSchemaChild] = static_cast<struct ArrowSchema *>(
428 17916 : CPLCalloc(1, sizeof(struct ArrowSchema)));
429 17916 : auto psChild = out_schema->children[iSchemaChild];
430 17916 : ++iSchemaChild;
431 17916 : psChild->release = OGRLayer::ReleaseSchema;
432 17916 : psChild->name = CPLStrdup(poFieldDefn->GetNameRef());
433 17916 : if (poFieldDefn->IsNullable())
434 17130 : psChild->flags = ARROW_FLAG_NULLABLE;
435 17916 : const auto eType = poFieldDefn->GetType();
436 17916 : const auto eSubType = poFieldDefn->GetSubType();
437 17916 : const char *item_format = nullptr;
438 :
439 35832 : std::vector<std::pair<std::string, std::string>> oMetadata;
440 :
441 17916 : switch (eType)
442 : {
443 5745 : case OFTInteger:
444 : {
445 5745 : if (eSubType == OFSTBoolean)
446 286 : psChild->format = "b";
447 5459 : else if (eSubType == OFSTInt16)
448 673 : psChild->format = "s";
449 : else
450 4786 : psChild->format = "i";
451 :
452 5745 : const auto &osDomainName = poFieldDefn->GetDomainName();
453 5745 : if (!osDomainName.empty())
454 : {
455 32 : auto poDS = GetDataset();
456 32 : if (poDS)
457 : {
458 : const auto poFieldDomain =
459 32 : poDS->GetFieldDomain(osDomainName);
460 64 : if (poFieldDomain &&
461 32 : poFieldDomain->GetDomainType() == OFDT_CODED)
462 : {
463 32 : const OGRCodedFieldDomain *poCodedDomain =
464 : static_cast<const OGRCodedFieldDomain *>(
465 : poFieldDomain);
466 32 : AddDictToSchema(psChild, poCodedDomain);
467 : }
468 : }
469 : }
470 :
471 5745 : break;
472 : }
473 :
474 571 : case OFTInteger64:
475 571 : psChild->format = "l";
476 571 : break;
477 :
478 2892 : case OFTReal:
479 : {
480 2892 : if (eSubType == OFSTFloat32)
481 676 : psChild->format = "f";
482 : else
483 2216 : psChild->format = "g";
484 2892 : break;
485 : }
486 :
487 5136 : case OFTString:
488 : case OFTWideString:
489 5136 : psChild->format = "u";
490 5136 : break;
491 :
492 1209 : case OFTBinary:
493 : {
494 1209 : if (poFieldDefn->GetWidth() > 0)
495 9 : psChild->format =
496 9 : CPLStrdup(CPLSPrintf("w:%d", poFieldDefn->GetWidth()));
497 : else
498 1200 : psChild->format = "z";
499 1209 : break;
500 : }
501 :
502 383 : case OFTIntegerList:
503 : {
504 383 : if (eSubType == OFSTBoolean)
505 92 : item_format = "b";
506 291 : else if (eSubType == OFSTInt16)
507 67 : item_format = "s";
508 : else
509 224 : item_format = "i";
510 383 : break;
511 : }
512 :
513 97 : case OFTInteger64List:
514 97 : item_format = "l";
515 97 : break;
516 :
517 256 : case OFTRealList:
518 : {
519 256 : if (eSubType == OFSTFloat32)
520 84 : item_format = "f";
521 : else
522 172 : item_format = "g";
523 256 : break;
524 : }
525 :
526 273 : case OFTStringList:
527 : case OFTWideStringList:
528 273 : item_format = "u";
529 273 : break;
530 :
531 210 : case OFTDate:
532 210 : psChild->format = "tdD";
533 210 : break;
534 :
535 118 : case OFTTime:
536 118 : psChild->format = "ttm";
537 118 : break;
538 :
539 1026 : case OFTDateTime:
540 : {
541 1026 : const char *pszPrefix = "tsm:";
542 : const char *pszTZOverride =
543 1026 : m_aosArrowArrayStreamOptions.FetchNameValue("TIMEZONE");
544 1026 : const int nTZFlag = poFieldDefn->GetTZFlag();
545 1026 : if (bDateTimeAsString)
546 : {
547 19 : psChild->format = "u";
548 : }
549 1007 : else if (pszTZOverride && EQUAL(pszTZOverride, "unknown"))
550 : {
551 4 : psChild->format = CPLStrdup(pszPrefix);
552 : }
553 1003 : else if ((pszTZOverride && EQUAL(pszTZOverride, "mixed")) ||
554 963 : (!pszTZOverride && nTZFlag == OGR_TZFLAG_MIXED_TZ))
555 : {
556 : oMetadata.emplace_back(
557 6 : std::pair(ARROW_EXTENSION_NAME_KEY,
558 6 : EXTENSION_NAME_ARROW_TIMESTAMP_WITH_OFFSET));
559 :
560 6 : psChild->format = "+s";
561 6 : psChild->n_children = 2;
562 6 : psChild->children = static_cast<struct ArrowSchema **>(
563 6 : CPLCalloc(2, sizeof(struct ArrowSchema *)));
564 :
565 : // Create sub-child for timestamp in UTC
566 12 : psChild->children[0] = static_cast<struct ArrowSchema *>(
567 6 : CPLCalloc(1, sizeof(struct ArrowSchema)));
568 6 : psChild->children[0]->release = OGRLayer::ReleaseSchema;
569 12 : psChild->children[0]->name =
570 6 : CPLStrdup(ATSWO_TIMESTAMP_FIELD_NAME);
571 6 : psChild->children[0]->format = CPLStrdup("tsm:UTC");
572 :
573 : // Create sub-child for offset to UTC in minutes
574 12 : psChild->children[1] = static_cast<struct ArrowSchema *>(
575 6 : CPLCalloc(1, sizeof(struct ArrowSchema)));
576 6 : psChild->children[1]->release = OGRLayer::ReleaseSchema;
577 12 : psChild->children[1]->name =
578 6 : CPLStrdup(ATSWO_OFFSET_MINUTES_FIELD_NAME);
579 6 : psChild->children[1]->format = "s";
580 : }
581 997 : else if (pszTZOverride)
582 : {
583 40 : psChild->format = CPLStrdup(
584 80 : (std::string(pszPrefix) + pszTZOverride).c_str());
585 : }
586 : else
587 : {
588 957 : if (nTZFlag == OGR_TZFLAG_UTC)
589 : {
590 5 : psChild->format =
591 5 : CPLStrdup(CPLSPrintf("%sUTC", pszPrefix));
592 : }
593 952 : else if (nTZFlag == OGR_TZFLAG_UNKNOWN ||
594 : nTZFlag == OGR_TZFLAG_LOCALTIME)
595 : {
596 936 : psChild->format = CPLStrdup(pszPrefix);
597 : }
598 : else
599 : {
600 16 : psChild->format = CPLStrdup(
601 32 : (pszPrefix + OGRTZFlagToTimezone(nTZFlag, "UTC"))
602 : .c_str());
603 : }
604 : }
605 1026 : break;
606 : }
607 : }
608 :
609 17916 : if (item_format)
610 : {
611 1009 : psChild->format = "+l";
612 1009 : psChild->n_children = 1;
613 1009 : psChild->children = static_cast<struct ArrowSchema **>(
614 1009 : CPLCalloc(1, sizeof(struct ArrowSchema *)));
615 2018 : psChild->children[0] = static_cast<struct ArrowSchema *>(
616 1009 : CPLCalloc(1, sizeof(struct ArrowSchema)));
617 1009 : psChild->children[0]->release = OGRLayer::ReleaseSchema;
618 1009 : psChild->children[0]->name = CPLStrdup("item");
619 1009 : psChild->children[0]->format = item_format;
620 : }
621 :
622 17916 : if (eType == OFTDateTime && bDateTimeAsString)
623 : {
624 : oMetadata.emplace_back(
625 19 : std::pair(MD_GDAL_OGR_TYPE, OGR_GetFieldTypeName(eType)));
626 : }
627 :
628 17916 : const char *pszAlternativeName = poFieldDefn->GetAlternativeNameRef();
629 17916 : if (pszAlternativeName && pszAlternativeName[0])
630 : oMetadata.emplace_back(
631 262 : std::pair(MD_GDAL_OGR_ALTERNATIVE_NAME, pszAlternativeName));
632 :
633 17916 : const char *pszDefault = poFieldDefn->GetDefault();
634 17916 : if (pszDefault && pszDefault[0])
635 42 : oMetadata.emplace_back(std::pair(MD_GDAL_OGR_DEFAULT, pszDefault));
636 :
637 17916 : const std::string &osComment = poFieldDefn->GetComment();
638 17916 : if (!osComment.empty())
639 10 : oMetadata.emplace_back(std::pair(MD_GDAL_OGR_COMMENT, osComment));
640 :
641 17916 : if (eType == OFTString && eSubType == OFSTJSON)
642 : {
643 130 : oMetadata.emplace_back(
644 130 : std::pair(ARROW_EXTENSION_NAME_KEY, EXTENSION_NAME_ARROW_JSON));
645 : }
646 17786 : else if (eSubType != OFSTNone && eSubType != OFSTBoolean &&
647 : eSubType != OFSTFloat32)
648 : {
649 0 : oMetadata.emplace_back(std::pair(
650 741 : MD_GDAL_OGR_SUBTYPE, OGR_GetFieldSubTypeName(eSubType)));
651 : }
652 17916 : if (eType == OFTString && poFieldDefn->GetWidth() > 0)
653 : {
654 0 : oMetadata.emplace_back(std::pair(
655 647 : MD_GDAL_OGR_WIDTH, CPLSPrintf("%d", poFieldDefn->GetWidth())));
656 : }
657 17916 : if (poFieldDefn->IsUnique())
658 : {
659 10 : oMetadata.emplace_back(std::pair(MD_GDAL_OGR_UNIQUE, "true"));
660 : }
661 17916 : if (!poFieldDefn->GetDomainName().empty())
662 : {
663 64 : oMetadata.emplace_back(std::pair(MD_GDAL_OGR_DOMAIN_NAME,
664 64 : poFieldDefn->GetDomainName()));
665 : }
666 :
667 17916 : if (!oMetadata.empty())
668 : {
669 1869 : uint64_t nLen64 = sizeof(int32_t);
670 3768 : for (const auto &oPair : oMetadata)
671 : {
672 1899 : nLen64 += sizeof(int32_t);
673 1899 : nLen64 += oPair.first.size();
674 1899 : nLen64 += sizeof(int32_t);
675 1899 : nLen64 += oPair.second.size();
676 : }
677 1869 : if (nLen64 <
678 1869 : static_cast<uint64_t>(std::numeric_limits<int32_t>::max()))
679 : {
680 1869 : const size_t nLen = static_cast<size_t>(nLen64);
681 1869 : char *pszMetadata = static_cast<char *>(CPLMalloc(nLen));
682 1869 : psChild->metadata = pszMetadata;
683 1869 : size_t offsetMD = 0;
684 1869 : int32_t nSize = static_cast<int>(oMetadata.size());
685 1869 : memcpy(pszMetadata + offsetMD, &nSize, sizeof(nSize));
686 1869 : offsetMD += sizeof(int32_t);
687 3768 : for (const auto &oPair : oMetadata)
688 : {
689 1899 : nSize = static_cast<int32_t>(oPair.first.size());
690 1899 : memcpy(pszMetadata + offsetMD, &nSize, sizeof(nSize));
691 1899 : offsetMD += sizeof(int32_t);
692 1899 : memcpy(pszMetadata + offsetMD, oPair.first.data(),
693 : oPair.first.size());
694 1899 : offsetMD += oPair.first.size();
695 :
696 1899 : nSize = static_cast<int32_t>(oPair.second.size());
697 1899 : memcpy(pszMetadata + offsetMD, &nSize, sizeof(nSize));
698 1899 : offsetMD += sizeof(int32_t);
699 1899 : memcpy(pszMetadata + offsetMD, oPair.second.data(),
700 : oPair.second.size());
701 1899 : offsetMD += oPair.second.size();
702 : }
703 :
704 1869 : CPLAssert(offsetMD == nLen);
705 1869 : CPL_IGNORE_RET_VAL(offsetMD);
706 : }
707 : else
708 : {
709 : // Extremely unlikely !
710 0 : CPLError(CE_Warning, CPLE_AppDefined,
711 : "Cannot write ArrowSchema::metadata due to "
712 : "too large content");
713 : }
714 : }
715 : }
716 :
717 : const char *const pszGeometryMetadataEncoding =
718 2255 : m_aosArrowArrayStreamOptions.FetchNameValue(
719 : "GEOMETRY_METADATA_ENCODING");
720 2255 : const char *pszExtensionName = EXTENSION_NAME_OGC_WKB;
721 2255 : if (pszGeometryMetadataEncoding)
722 : {
723 6 : if (EQUAL(pszGeometryMetadataEncoding, "OGC"))
724 0 : pszExtensionName = EXTENSION_NAME_OGC_WKB;
725 6 : else if (EQUAL(pszGeometryMetadataEncoding, "GEOARROW"))
726 6 : pszExtensionName = EXTENSION_NAME_GEOARROW_WKB;
727 : else
728 0 : CPLError(CE_Warning, CPLE_NotSupported,
729 : "Unsupported GEOMETRY_METADATA_ENCODING value: %s",
730 : pszGeometryMetadataEncoding);
731 : }
732 4223 : for (int i = 0; i < nGeomFieldCount; ++i)
733 : {
734 1968 : const auto poFieldDefn = poLayerDefn->GetGeomFieldDefn(i);
735 1968 : if (poFieldDefn->IsIgnored())
736 : {
737 15 : continue;
738 : }
739 :
740 1953 : out_schema->children[iSchemaChild] = CreateSchemaForWKBGeometryColumn(
741 : poFieldDefn, "z", pszExtensionName);
742 :
743 1953 : ++iSchemaChild;
744 : }
745 :
746 2255 : out_schema->n_children = iSchemaChild;
747 2255 : out_schema->release = OGRLayer::ReleaseSchema;
748 2255 : return 0;
749 : }
750 :
751 : /************************************************************************/
752 : /* CreateSchemaForWKBGeometryColumn() */
753 : /************************************************************************/
754 :
755 : /** Return a ArrowSchema* corresponding to the WKB encoding of a geometry
756 : * column.
757 : */
758 :
759 : /* static */
760 : struct ArrowSchema *
761 2282 : OGRLayer::CreateSchemaForWKBGeometryColumn(const OGRGeomFieldDefn *poFieldDefn,
762 : const char *pszArrowFormat,
763 : const char *pszExtensionName)
764 : {
765 2282 : CPLAssert(strcmp(pszArrowFormat, "z") == 0 ||
766 : strcmp(pszArrowFormat, "Z") == 0);
767 2282 : if (!EQUAL(pszExtensionName, EXTENSION_NAME_OGC_WKB) &&
768 6 : !EQUAL(pszExtensionName, EXTENSION_NAME_GEOARROW_WKB))
769 : {
770 0 : CPLError(CE_Failure, CPLE_NotSupported,
771 : "Unsupported extension name '%s'. Defaulting to '%s'",
772 : pszExtensionName, EXTENSION_NAME_OGC_WKB);
773 0 : pszExtensionName = EXTENSION_NAME_OGC_WKB;
774 : }
775 : auto psSchema = static_cast<struct ArrowSchema *>(
776 2282 : CPLCalloc(1, sizeof(struct ArrowSchema)));
777 2282 : psSchema->release = OGRLayer::ReleaseSchema;
778 2282 : const char *pszGeomFieldName = poFieldDefn->GetNameRef();
779 2282 : if (pszGeomFieldName[0] == '\0')
780 802 : pszGeomFieldName = DEFAULT_ARROW_GEOMETRY_NAME;
781 2282 : psSchema->name = CPLStrdup(pszGeomFieldName);
782 2282 : if (poFieldDefn->IsNullable())
783 2253 : psSchema->flags = ARROW_FLAG_NULLABLE;
784 2282 : psSchema->format = strcmp(pszArrowFormat, "z") == 0 ? "z" : "Z";
785 2282 : std::string osExtensionMetadata;
786 2282 : if (EQUAL(pszExtensionName, EXTENSION_NAME_GEOARROW_WKB))
787 : {
788 6 : const auto poSRS = poFieldDefn->GetSpatialRef();
789 6 : if (poSRS)
790 : {
791 3 : char *pszPROJJSON = nullptr;
792 3 : poSRS->exportToPROJJSON(&pszPROJJSON, nullptr);
793 3 : if (pszPROJJSON)
794 : {
795 3 : osExtensionMetadata = "{\"crs\":";
796 3 : osExtensionMetadata += pszPROJJSON;
797 3 : osExtensionMetadata += '}';
798 3 : CPLFree(pszPROJJSON);
799 : }
800 : else
801 : {
802 0 : CPLError(CE_Warning, CPLE_AppDefined,
803 : "Cannot export CRS of geometry field %s to PROJJSON",
804 : poFieldDefn->GetNameRef());
805 : }
806 : }
807 : }
808 2282 : size_t nLen = sizeof(int32_t) + sizeof(int32_t) +
809 : strlen(ARROW_EXTENSION_NAME_KEY) + sizeof(int32_t) +
810 2282 : strlen(pszExtensionName);
811 2282 : if (!osExtensionMetadata.empty())
812 : {
813 3 : nLen += sizeof(int32_t) + strlen(ARROW_EXTENSION_METADATA_KEY) +
814 3 : sizeof(int32_t) + osExtensionMetadata.size();
815 : }
816 2282 : char *pszMetadata = static_cast<char *>(CPLMalloc(nLen));
817 2282 : psSchema->metadata = pszMetadata;
818 2282 : size_t offsetMD = 0;
819 2282 : *reinterpret_cast<int32_t *>(pszMetadata + offsetMD) =
820 2282 : osExtensionMetadata.empty() ? 1 : 2;
821 2282 : offsetMD += sizeof(int32_t);
822 2282 : *reinterpret_cast<int32_t *>(pszMetadata + offsetMD) =
823 : static_cast<int32_t>(strlen(ARROW_EXTENSION_NAME_KEY));
824 2282 : offsetMD += sizeof(int32_t);
825 2282 : memcpy(pszMetadata + offsetMD, ARROW_EXTENSION_NAME_KEY,
826 : strlen(ARROW_EXTENSION_NAME_KEY));
827 2282 : offsetMD += static_cast<int>(strlen(ARROW_EXTENSION_NAME_KEY));
828 2282 : *reinterpret_cast<int32_t *>(pszMetadata + offsetMD) =
829 2282 : static_cast<int32_t>(strlen(pszExtensionName));
830 2282 : offsetMD += sizeof(int32_t);
831 2282 : memcpy(pszMetadata + offsetMD, pszExtensionName, strlen(pszExtensionName));
832 2282 : offsetMD += strlen(pszExtensionName);
833 2282 : if (!osExtensionMetadata.empty())
834 : {
835 3 : *reinterpret_cast<int32_t *>(pszMetadata + offsetMD) =
836 : static_cast<int32_t>(strlen(ARROW_EXTENSION_METADATA_KEY));
837 3 : offsetMD += sizeof(int32_t);
838 3 : memcpy(pszMetadata + offsetMD, ARROW_EXTENSION_METADATA_KEY,
839 : strlen(ARROW_EXTENSION_METADATA_KEY));
840 3 : offsetMD += static_cast<int>(strlen(ARROW_EXTENSION_METADATA_KEY));
841 3 : *reinterpret_cast<int32_t *>(pszMetadata + offsetMD) =
842 3 : static_cast<int32_t>(osExtensionMetadata.size());
843 3 : offsetMD += sizeof(int32_t);
844 3 : memcpy(pszMetadata + offsetMD, osExtensionMetadata.c_str(),
845 : osExtensionMetadata.size());
846 3 : offsetMD += osExtensionMetadata.size();
847 : }
848 2282 : CPLAssert(offsetMD == nLen);
849 2282 : CPL_IGNORE_RET_VAL(offsetMD);
850 4564 : return psSchema;
851 : }
852 :
853 : /************************************************************************/
854 : /* StaticGetArrowSchema() */
855 : /************************************************************************/
856 :
857 : /** Default implementation of the ArrowArrayStream::get_schema() callback.
858 : *
859 : * To be used by driver implementations that have a custom GetArrowStream()
860 : * implementation.
861 : *
862 : * @since GDAL 3.6
863 : */
864 2476 : int OGRLayer::StaticGetArrowSchema(struct ArrowArrayStream *stream,
865 : struct ArrowSchema *out_schema)
866 : {
867 : auto poLayer = static_cast<ArrowArrayStreamPrivateDataSharedDataWrapper *>(
868 2476 : stream->private_data)
869 2476 : ->poShared->m_poLayer;
870 2476 : if (poLayer == nullptr)
871 : {
872 1 : CPLError(CE_Failure, CPLE_NotSupported,
873 : "Calling get_schema() on a freed OGRLayer is not supported");
874 1 : return EINVAL;
875 : }
876 2475 : return poLayer->GetArrowSchema(stream, out_schema);
877 : }
878 :
879 : /************************************************************************/
880 : /* DefaultReleaseArray() */
881 : /************************************************************************/
882 :
883 35089 : static void OGRLayerDefaultReleaseArray(struct ArrowArray *array)
884 : {
885 35089 : if (array->buffers)
886 : {
887 110257 : for (int i = 0; i < static_cast<int>(array->n_buffers); ++i)
888 75168 : VSIFreeAligned(const_cast<void *>(array->buffers[i]));
889 35089 : CPLFree(array->buffers);
890 : }
891 35089 : if (array->children)
892 : {
893 41032 : for (int i = 0; i < static_cast<int>(array->n_children); ++i)
894 : {
895 33262 : if (array->children[i] && array->children[i]->release)
896 : {
897 32889 : array->children[i]->release(array->children[i]);
898 32889 : CPLFree(array->children[i]);
899 : }
900 : }
901 7770 : CPLFree(array->children);
902 : }
903 35089 : if (array->dictionary)
904 : {
905 148 : if (array->dictionary->release)
906 : {
907 148 : array->dictionary->release(array->dictionary);
908 148 : CPLFree(array->dictionary);
909 : }
910 : }
911 35089 : array->release = nullptr;
912 35089 : }
913 :
914 : /** Release a ArrowArray.
915 : *
916 : * To be used by driver implementations that have a custom GetArrowStream()
917 : * implementation.
918 : *
919 : * @param array Arrow array to release.
920 : * @since GDAL 3.6
921 : */
922 3987 : void OGRLayer::ReleaseArray(struct ArrowArray *array)
923 : {
924 3987 : OGRLayerDefaultReleaseArray(array);
925 3987 : }
926 :
927 : /************************************************************************/
928 : /* IsValidField() */
929 : /************************************************************************/
930 :
931 88666 : static inline bool IsValidField(const OGRField *psRawField)
932 : {
933 103806 : return (!(psRawField->Set.nMarker1 == OGRUnsetMarker &&
934 7570 : psRawField->Set.nMarker2 == OGRUnsetMarker &&
935 177332 : psRawField->Set.nMarker3 == OGRUnsetMarker) &&
936 81096 : !(psRawField->Set.nMarker1 == OGRNullMarker &&
937 3213 : psRawField->Set.nMarker2 == OGRNullMarker &&
938 91879 : psRawField->Set.nMarker3 == OGRNullMarker));
939 : }
940 :
941 : /************************************************************************/
942 : /* AllocValidityBitmap() */
943 : /************************************************************************/
944 :
945 3516 : static uint8_t *AllocValidityBitmap(size_t nSize)
946 : {
947 : auto pabyValidity = static_cast<uint8_t *>(
948 3516 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE((1 + nSize + 7) / 8));
949 3516 : if (pabyValidity)
950 : {
951 : // All valid initially
952 3516 : memset(pabyValidity, 0xFF, (nSize + 7) / 8);
953 : }
954 3516 : return pabyValidity;
955 : }
956 :
957 : /************************************************************************/
958 : /* FillArray() */
959 : /************************************************************************/
960 :
961 : template <class T, typename TMember>
962 5831 : static bool FillArray(struct ArrowArray *psChild,
963 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
964 : const size_t nFeatureCountLimit, const bool bIsNullable,
965 : TMember member, const int i)
966 : {
967 5831 : psChild->n_buffers = 2;
968 5831 : psChild->buffers = static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
969 5831 : uint8_t *pabyValidity = nullptr;
970 : T *panValues = static_cast<T *>(
971 5831 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(T) * (1 + nFeatureCountLimit)));
972 5831 : if (panValues == nullptr)
973 0 : return false;
974 5831 : psChild->buffers[1] = panValues;
975 53841 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat)
976 : {
977 48010 : auto &poFeature = apoFeatures[iFeat];
978 48010 : const auto psRawField = poFeature->GetRawFieldRef(i);
979 48010 : if (IsValidField(psRawField))
980 : {
981 43092 : panValues[iFeat] = static_cast<T>((*psRawField).*member);
982 : }
983 4918 : else if (bIsNullable)
984 : {
985 4918 : panValues[iFeat] = 0;
986 4918 : ++psChild->null_count;
987 4918 : if (pabyValidity == nullptr)
988 : {
989 1235 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
990 1235 : psChild->buffers[0] = pabyValidity;
991 1235 : if (pabyValidity == nullptr)
992 0 : return false;
993 : }
994 4918 : UnsetBit(pabyValidity, iFeat);
995 : }
996 : else
997 : {
998 0 : panValues[iFeat] = 0;
999 : }
1000 : }
1001 5831 : return true;
1002 : }
1003 :
1004 : /************************************************************************/
1005 : /* FillBoolArray() */
1006 : /************************************************************************/
1007 :
1008 : template <typename TMember>
1009 138 : static bool FillBoolArray(struct ArrowArray *psChild,
1010 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1011 : const size_t nFeatureCountLimit,
1012 : const bool bIsNullable, TMember member, const int i)
1013 : {
1014 138 : psChild->n_buffers = 2;
1015 138 : psChild->buffers = static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1016 138 : uint8_t *pabyValidity = nullptr;
1017 : uint8_t *panValues = static_cast<uint8_t *>(
1018 138 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE((nFeatureCountLimit + 7 + 1) / 8));
1019 138 : if (panValues == nullptr)
1020 0 : return false;
1021 138 : memset(panValues, 0, (nFeatureCountLimit + 7) / 8);
1022 138 : psChild->buffers[1] = panValues;
1023 601 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat)
1024 : {
1025 463 : auto &poFeature = apoFeatures[iFeat];
1026 463 : const auto psRawField = poFeature->GetRawFieldRef(i);
1027 463 : if (IsValidField(psRawField))
1028 : {
1029 405 : if ((*psRawField).*member)
1030 81 : SetBit(panValues, iFeat);
1031 : }
1032 58 : else if (bIsNullable)
1033 : {
1034 58 : ++psChild->null_count;
1035 58 : if (pabyValidity == nullptr)
1036 : {
1037 46 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1038 46 : psChild->buffers[0] = pabyValidity;
1039 46 : if (pabyValidity == nullptr)
1040 0 : return false;
1041 : }
1042 58 : UnsetBit(pabyValidity, iFeat);
1043 : }
1044 : }
1045 138 : return true;
1046 : }
1047 :
1048 : /************************************************************************/
1049 : /* FillListArray() */
1050 : /************************************************************************/
1051 :
1052 : struct GetFromIntegerList
1053 : {
1054 555 : static inline int getCount(const OGRField *psRawField)
1055 : {
1056 555 : return psRawField->IntegerList.nCount;
1057 : }
1058 :
1059 276 : static inline const int *getValues(const OGRField *psRawField)
1060 : {
1061 276 : return psRawField->IntegerList.paList;
1062 : }
1063 : };
1064 :
1065 : struct GetFromInteger64List
1066 : {
1067 242 : static inline int getCount(const OGRField *psRawField)
1068 : {
1069 242 : return psRawField->Integer64List.nCount;
1070 : }
1071 :
1072 120 : static inline const GIntBig *getValues(const OGRField *psRawField)
1073 : {
1074 120 : return psRawField->Integer64List.paList;
1075 : }
1076 : };
1077 :
1078 : struct GetFromRealList
1079 : {
1080 374 : static inline int getCount(const OGRField *psRawField)
1081 : {
1082 374 : return psRawField->RealList.nCount;
1083 : }
1084 :
1085 186 : static inline const double *getValues(const OGRField *psRawField)
1086 : {
1087 186 : return psRawField->RealList.paList;
1088 : }
1089 : };
1090 :
1091 : template <class OffsetType, class T, class GetFromList>
1092 : static size_t
1093 416 : FillListArray(struct ArrowArray *psChild,
1094 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1095 : const size_t nFeatureCountLimit, const bool bIsNullable,
1096 : const int i, const size_t nMemLimit)
1097 : {
1098 416 : psChild->n_buffers = 2;
1099 416 : psChild->buffers = static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1100 416 : uint8_t *pabyValidity = nullptr;
1101 : OffsetType *panOffsets =
1102 416 : static_cast<OffsetType *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
1103 : sizeof(OffsetType) * (1 + nFeatureCountLimit)));
1104 416 : if (panOffsets == nullptr)
1105 0 : return 0;
1106 416 : psChild->buffers[1] = panOffsets;
1107 :
1108 416 : OffsetType nOffset = 0;
1109 416 : size_t nFeatCount = 0;
1110 1445 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat, ++nFeatCount)
1111 : {
1112 1035 : panOffsets[iFeat] = nOffset;
1113 1035 : auto &poFeature = apoFeatures[iFeat];
1114 1035 : const auto psRawField = poFeature->GetRawFieldRef(i);
1115 1035 : if (IsValidField(psRawField))
1116 : {
1117 529 : const unsigned nCount = GetFromList::getCount(psRawField);
1118 529 : if (nCount > static_cast<size_t>(nMemLimit - nOffset))
1119 : {
1120 6 : if (nFeatCount == 0)
1121 3 : return 0;
1122 3 : break;
1123 : }
1124 523 : nOffset += static_cast<OffsetType>(nCount);
1125 : }
1126 506 : else if (bIsNullable)
1127 : {
1128 506 : ++psChild->null_count;
1129 506 : if (pabyValidity == nullptr)
1130 : {
1131 231 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1132 231 : psChild->buffers[0] = pabyValidity;
1133 231 : if (pabyValidity == nullptr)
1134 0 : return 0;
1135 : }
1136 506 : UnsetBit(pabyValidity, iFeat);
1137 : }
1138 : }
1139 413 : panOffsets[nFeatCount] = nOffset;
1140 :
1141 413 : psChild->n_children = 1;
1142 413 : psChild->children = static_cast<struct ArrowArray **>(
1143 413 : CPLCalloc(1, sizeof(struct ArrowArray *)));
1144 826 : psChild->children[0] = static_cast<struct ArrowArray *>(
1145 413 : CPLCalloc(1, sizeof(struct ArrowArray)));
1146 413 : auto psValueChild = psChild->children[0];
1147 :
1148 413 : psValueChild->release = OGRLayerDefaultReleaseArray;
1149 413 : psValueChild->n_buffers = 2;
1150 413 : psValueChild->buffers =
1151 413 : static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1152 413 : psValueChild->length = nOffset;
1153 : T *panValues = static_cast<T *>(
1154 413 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(T) * (nOffset + 1)));
1155 413 : if (panValues == nullptr)
1156 0 : return 0;
1157 413 : psValueChild->buffers[1] = panValues;
1158 :
1159 413 : nOffset = 0;
1160 1442 : for (size_t iFeat = 0; iFeat < nFeatCount; ++iFeat)
1161 : {
1162 1029 : auto &poFeature = apoFeatures[iFeat];
1163 1029 : const auto psRawField = poFeature->GetRawFieldRef(i);
1164 1029 : if (IsValidField(psRawField))
1165 : {
1166 523 : const int nCount = GetFromList::getCount(psRawField);
1167 523 : const auto paList = GetFromList::getValues(psRawField);
1168 : if (sizeof(*paList) == sizeof(T))
1169 456 : memcpy(panValues + nOffset, paList, nCount * sizeof(T));
1170 : else
1171 : {
1172 203 : for (int j = 0; j < nCount; ++j)
1173 : {
1174 136 : panValues[nOffset + j] = static_cast<T>(paList[j]);
1175 : }
1176 : }
1177 523 : nOffset += static_cast<OffsetType>(nCount);
1178 : }
1179 : }
1180 :
1181 413 : return nFeatCount;
1182 : }
1183 :
1184 : template <class OffsetType, class GetFromList>
1185 : static size_t
1186 49 : FillListArrayBool(struct ArrowArray *psChild,
1187 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1188 : const size_t nFeatureCountLimit, const bool bIsNullable,
1189 : const int i, const size_t nMemLimit)
1190 : {
1191 49 : psChild->n_buffers = 2;
1192 49 : psChild->buffers = static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1193 49 : uint8_t *pabyValidity = nullptr;
1194 : OffsetType *panOffsets =
1195 49 : static_cast<OffsetType *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
1196 : sizeof(OffsetType) * (1 + nFeatureCountLimit)));
1197 49 : if (panOffsets == nullptr)
1198 0 : return 0;
1199 49 : psChild->buffers[1] = panOffsets;
1200 :
1201 49 : OffsetType nOffset = 0;
1202 49 : size_t nFeatCount = 0;
1203 138 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat, ++nFeatCount)
1204 : {
1205 91 : panOffsets[iFeat] = nOffset;
1206 91 : auto &poFeature = apoFeatures[iFeat];
1207 91 : const auto psRawField = poFeature->GetRawFieldRef(i);
1208 91 : if (IsValidField(psRawField))
1209 : {
1210 60 : const unsigned nCount = GetFromList::getCount(psRawField);
1211 60 : if (nCount > static_cast<size_t>(nMemLimit - nOffset))
1212 : {
1213 2 : if (nFeatCount == 0)
1214 1 : return 0;
1215 1 : break;
1216 : }
1217 58 : nOffset += static_cast<OffsetType>(nCount);
1218 : }
1219 31 : else if (bIsNullable)
1220 : {
1221 31 : ++psChild->null_count;
1222 31 : if (pabyValidity == nullptr)
1223 : {
1224 27 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1225 27 : psChild->buffers[0] = pabyValidity;
1226 27 : if (pabyValidity == nullptr)
1227 0 : return 0;
1228 : }
1229 31 : UnsetBit(pabyValidity, iFeat);
1230 : }
1231 : }
1232 48 : panOffsets[nFeatCount] = nOffset;
1233 :
1234 48 : psChild->n_children = 1;
1235 48 : psChild->children = static_cast<struct ArrowArray **>(
1236 48 : CPLCalloc(1, sizeof(struct ArrowArray *)));
1237 96 : psChild->children[0] = static_cast<struct ArrowArray *>(
1238 48 : CPLCalloc(1, sizeof(struct ArrowArray)));
1239 48 : auto psValueChild = psChild->children[0];
1240 :
1241 48 : psValueChild->release = OGRLayerDefaultReleaseArray;
1242 48 : psValueChild->n_buffers = 2;
1243 48 : psValueChild->buffers =
1244 48 : static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1245 48 : psValueChild->length = nOffset;
1246 : uint8_t *panValues = static_cast<uint8_t *>(
1247 48 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE((nOffset + 7 + 1) / 8));
1248 48 : if (panValues == nullptr)
1249 0 : return 0;
1250 48 : memset(panValues, 0, (nOffset + 7) / 8);
1251 48 : psValueChild->buffers[1] = panValues;
1252 :
1253 48 : nOffset = 0;
1254 138 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat)
1255 : {
1256 90 : auto &poFeature = apoFeatures[iFeat];
1257 90 : const auto psRawField = poFeature->GetRawFieldRef(i);
1258 90 : if (IsValidField(psRawField))
1259 : {
1260 59 : const int nCount = GetFromList::getCount(psRawField);
1261 59 : const auto paList = GetFromList::getValues(psRawField);
1262 :
1263 373 : for (int j = 0; j < nCount; ++j)
1264 : {
1265 314 : if (paList[j])
1266 55 : SetBit(panValues, nOffset + j);
1267 : }
1268 59 : nOffset += static_cast<OffsetType>(nCount);
1269 : }
1270 : }
1271 :
1272 48 : return nFeatCount;
1273 : }
1274 :
1275 : /************************************************************************/
1276 : /* FillStringArray() */
1277 : /************************************************************************/
1278 :
1279 : template <class T>
1280 : static size_t
1281 3777 : FillStringArray(struct ArrowArray *psChild,
1282 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1283 : const size_t nFeatureCountLimit, const bool bIsNullable,
1284 : const int i, const size_t nMemLimit)
1285 : {
1286 3777 : psChild->n_buffers = 3;
1287 3777 : psChild->buffers = static_cast<const void **>(CPLCalloc(3, sizeof(void *)));
1288 3777 : uint8_t *pabyValidity = nullptr;
1289 : T *panOffsets = static_cast<T *>(
1290 3777 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(T) * (1 + nFeatureCountLimit)));
1291 3777 : if (panOffsets == nullptr)
1292 0 : return 0;
1293 3777 : psChild->buffers[1] = panOffsets;
1294 :
1295 3777 : size_t nOffset = 0;
1296 3777 : size_t nFeatCount = 0;
1297 34027 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat, ++nFeatCount)
1298 : {
1299 30270 : panOffsets[iFeat] = static_cast<T>(nOffset);
1300 30270 : const auto psRawField = apoFeatures[iFeat]->GetRawFieldRef(i);
1301 30270 : if (IsValidField(psRawField))
1302 : {
1303 26930 : const size_t nLen = strlen(psRawField->String);
1304 26930 : if (nLen > nMemLimit - nOffset)
1305 : {
1306 20 : if (nFeatCount == 0)
1307 19 : return 0;
1308 1 : break;
1309 : }
1310 26910 : nOffset += static_cast<T>(nLen);
1311 : }
1312 3340 : else if (bIsNullable)
1313 : {
1314 3340 : ++psChild->null_count;
1315 3340 : if (pabyValidity == nullptr)
1316 : {
1317 1131 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1318 1131 : psChild->buffers[0] = pabyValidity;
1319 1131 : if (pabyValidity == nullptr)
1320 0 : return 0;
1321 : }
1322 3340 : UnsetBit(pabyValidity, iFeat);
1323 : }
1324 : }
1325 3758 : panOffsets[nFeatCount] = static_cast<T>(nOffset);
1326 :
1327 : char *pachValues =
1328 3758 : static_cast<char *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nOffset + 1));
1329 3758 : if (pachValues == nullptr)
1330 0 : return 0;
1331 3758 : psChild->buffers[2] = pachValues;
1332 :
1333 3758 : nOffset = 0;
1334 34008 : for (size_t iFeat = 0; iFeat < nFeatCount; ++iFeat)
1335 : {
1336 30250 : const size_t nLen =
1337 30250 : static_cast<size_t>(panOffsets[iFeat + 1] - panOffsets[iFeat]);
1338 30250 : if (nLen)
1339 : {
1340 25358 : const auto psRawField = apoFeatures[iFeat]->GetRawFieldRef(i);
1341 25358 : memcpy(pachValues + nOffset, psRawField->String, nLen);
1342 25358 : nOffset += nLen;
1343 : }
1344 : }
1345 :
1346 3758 : return nFeatCount;
1347 : }
1348 :
1349 : /************************************************************************/
1350 : /* FillStringListArray() */
1351 : /************************************************************************/
1352 :
1353 : template <class OffsetType>
1354 : static size_t
1355 203 : FillStringListArray(struct ArrowArray *psChild,
1356 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1357 : const size_t nFeatureCountLimit, const bool bIsNullable,
1358 : const int i, const size_t nMemLimit)
1359 : {
1360 203 : psChild->n_buffers = 2;
1361 203 : psChild->buffers = static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1362 203 : uint8_t *pabyValidity = nullptr;
1363 : OffsetType *panOffsets =
1364 203 : static_cast<OffsetType *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
1365 : sizeof(OffsetType) * (1 + nFeatureCountLimit)));
1366 203 : if (panOffsets == nullptr)
1367 0 : return false;
1368 203 : psChild->buffers[1] = panOffsets;
1369 :
1370 203 : OffsetType nStrings = 0;
1371 203 : OffsetType nCountChars = 0;
1372 203 : size_t nFeatCount = 0;
1373 516 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat, ++nFeatCount)
1374 : {
1375 315 : panOffsets[iFeat] = nStrings;
1376 315 : auto &poFeature = apoFeatures[iFeat];
1377 315 : const auto psRawField = poFeature->GetRawFieldRef(i);
1378 315 : if (IsValidField(psRawField))
1379 : {
1380 108 : const int nCount = psRawField->StringList.nCount;
1381 108 : if (static_cast<size_t>(nCount) >
1382 108 : static_cast<size_t>(nMemLimit - nStrings))
1383 : {
1384 0 : if (nFeatCount == 0)
1385 0 : return 0;
1386 0 : goto after_loop;
1387 : }
1388 280 : for (int j = 0; j < nCount; ++j)
1389 : {
1390 174 : const size_t nLen = strlen(psRawField->StringList.paList[j]);
1391 174 : if (nLen > static_cast<size_t>(nMemLimit - nCountChars))
1392 : {
1393 2 : if (nFeatCount == 0)
1394 1 : return 0;
1395 1 : goto after_loop;
1396 : }
1397 172 : nCountChars += static_cast<OffsetType>(nLen);
1398 : }
1399 106 : nStrings += static_cast<OffsetType>(nCount);
1400 : }
1401 207 : else if (bIsNullable)
1402 : {
1403 207 : ++psChild->null_count;
1404 207 : if (pabyValidity == nullptr)
1405 : {
1406 152 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1407 152 : psChild->buffers[0] = pabyValidity;
1408 152 : if (pabyValidity == nullptr)
1409 0 : return 0;
1410 : }
1411 207 : UnsetBit(pabyValidity, iFeat);
1412 : }
1413 : }
1414 201 : after_loop:
1415 202 : panOffsets[nFeatCount] = nStrings;
1416 :
1417 202 : psChild->n_children = 1;
1418 202 : psChild->children = static_cast<struct ArrowArray **>(
1419 202 : CPLCalloc(1, sizeof(struct ArrowArray *)));
1420 404 : psChild->children[0] = static_cast<struct ArrowArray *>(
1421 202 : CPLCalloc(1, sizeof(struct ArrowArray)));
1422 202 : auto psValueChild = psChild->children[0];
1423 :
1424 202 : psValueChild->release = OGRLayerDefaultReleaseArray;
1425 202 : psValueChild->length = nStrings;
1426 202 : psValueChild->n_buffers = 3;
1427 202 : psValueChild->buffers =
1428 202 : static_cast<const void **>(CPLCalloc(3, sizeof(void *)));
1429 :
1430 : OffsetType *panChildOffsets = static_cast<OffsetType *>(
1431 202 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(OffsetType) * (1 + nStrings)));
1432 202 : if (panChildOffsets == nullptr)
1433 0 : return 0;
1434 202 : psValueChild->buffers[1] = panChildOffsets;
1435 :
1436 : char *pachValues =
1437 202 : static_cast<char *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nCountChars + 1));
1438 202 : if (pachValues == nullptr)
1439 0 : return 0;
1440 202 : psValueChild->buffers[2] = pachValues;
1441 :
1442 202 : nStrings = 0;
1443 202 : nCountChars = 0;
1444 515 : for (size_t iFeat = 0; iFeat < nFeatCount; ++iFeat)
1445 : {
1446 313 : auto &poFeature = apoFeatures[iFeat];
1447 313 : const auto psRawField = poFeature->GetRawFieldRef(i);
1448 313 : if (IsValidField(psRawField))
1449 : {
1450 106 : const int nCount = psRawField->StringList.nCount;
1451 278 : for (int j = 0; j < nCount; ++j)
1452 : {
1453 172 : panChildOffsets[nStrings] = nCountChars;
1454 172 : ++nStrings;
1455 172 : const size_t nLen = strlen(psRawField->StringList.paList[j]);
1456 172 : memcpy(pachValues + nCountChars,
1457 172 : psRawField->StringList.paList[j], nLen);
1458 172 : nCountChars += static_cast<OffsetType>(nLen);
1459 : }
1460 : }
1461 : }
1462 202 : panChildOffsets[nStrings] = nCountChars;
1463 :
1464 202 : return nFeatCount;
1465 : }
1466 :
1467 : /************************************************************************/
1468 : /* FillBinaryArray() */
1469 : /************************************************************************/
1470 :
1471 : template <class T>
1472 : static size_t
1473 905 : FillBinaryArray(struct ArrowArray *psChild,
1474 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1475 : const size_t nFeatureCountLimit, const bool bIsNullable,
1476 : const int i, const size_t nMemLimit)
1477 : {
1478 905 : psChild->n_buffers = 3;
1479 905 : psChild->buffers = static_cast<const void **>(CPLCalloc(3, sizeof(void *)));
1480 905 : uint8_t *pabyValidity = nullptr;
1481 : T *panOffsets = static_cast<T *>(
1482 905 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(T) * (1 + nFeatureCountLimit)));
1483 905 : if (panOffsets == nullptr)
1484 0 : return 0;
1485 905 : psChild->buffers[1] = panOffsets;
1486 :
1487 905 : T nOffset = 0;
1488 905 : size_t nFeatCount = 0;
1489 4362 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat, ++nFeatCount)
1490 : {
1491 3459 : panOffsets[iFeat] = nOffset;
1492 3459 : const auto psRawField = apoFeatures[iFeat]->GetRawFieldRef(i);
1493 3459 : if (IsValidField(psRawField))
1494 : {
1495 3402 : const size_t nLen = psRawField->Binary.nCount;
1496 3402 : if (nLen > static_cast<size_t>(nMemLimit - nOffset))
1497 : {
1498 2 : if (iFeat == 0)
1499 1 : return 0;
1500 1 : break;
1501 : }
1502 3400 : nOffset += static_cast<T>(nLen);
1503 : }
1504 57 : else if (bIsNullable)
1505 : {
1506 57 : ++psChild->null_count;
1507 57 : if (pabyValidity == nullptr)
1508 : {
1509 49 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1510 49 : psChild->buffers[0] = pabyValidity;
1511 49 : if (pabyValidity == nullptr)
1512 0 : return 0;
1513 : }
1514 57 : UnsetBit(pabyValidity, iFeat);
1515 : }
1516 : }
1517 904 : panOffsets[nFeatCount] = nOffset;
1518 :
1519 : GByte *pabyValues =
1520 904 : static_cast<GByte *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nOffset + 1));
1521 904 : if (pabyValues == nullptr)
1522 0 : return 0;
1523 904 : psChild->buffers[2] = pabyValues;
1524 :
1525 904 : nOffset = 0;
1526 4361 : for (size_t iFeat = 0; iFeat < nFeatCount; ++iFeat)
1527 : {
1528 3457 : const size_t nLen =
1529 3457 : static_cast<size_t>(panOffsets[iFeat + 1] - panOffsets[iFeat]);
1530 3457 : if (nLen)
1531 : {
1532 3400 : const auto psRawField = apoFeatures[iFeat]->GetRawFieldRef(i);
1533 3400 : memcpy(pabyValues + nOffset, psRawField->Binary.paData, nLen);
1534 3400 : nOffset += static_cast<T>(nLen);
1535 : }
1536 : }
1537 :
1538 904 : return nFeatCount;
1539 : }
1540 :
1541 : /************************************************************************/
1542 : /* FillFixedWidthBinaryArray() */
1543 : /************************************************************************/
1544 :
1545 : static bool
1546 8 : FillFixedWidthBinaryArray(struct ArrowArray *psChild,
1547 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1548 : const size_t nFeatureCountLimit,
1549 : const bool bIsNullable, const int nWidth, const int i)
1550 : {
1551 8 : psChild->n_buffers = 2;
1552 8 : psChild->buffers = static_cast<const void **>(CPLCalloc(3, sizeof(void *)));
1553 8 : uint8_t *pabyValidity = nullptr;
1554 :
1555 8 : assert(nFeatureCountLimit + 1 <=
1556 : std::numeric_limits<size_t>::max() / nWidth);
1557 : GByte *pabyValues = static_cast<GByte *>(
1558 8 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE((nFeatureCountLimit + 1) * nWidth));
1559 8 : if (pabyValues == nullptr)
1560 0 : return false;
1561 8 : psChild->buffers[1] = pabyValues;
1562 :
1563 29 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat)
1564 : {
1565 21 : const auto psRawField = apoFeatures[iFeat]->GetRawFieldRef(i);
1566 21 : if (IsValidField(psRawField))
1567 : {
1568 20 : const auto nLen = psRawField->Binary.nCount;
1569 20 : if (nLen < nWidth)
1570 : {
1571 0 : memcpy(pabyValues + iFeat * nWidth, psRawField->Binary.paData,
1572 : nLen);
1573 0 : memset(pabyValues + iFeat * nWidth + nLen, 0, nWidth - nLen);
1574 : }
1575 : else
1576 : {
1577 20 : memcpy(pabyValues + iFeat * nWidth, psRawField->Binary.paData,
1578 : nWidth);
1579 : }
1580 : }
1581 : else
1582 : {
1583 1 : memset(pabyValues + iFeat * nWidth, 0, nWidth);
1584 1 : if (bIsNullable)
1585 : {
1586 1 : ++psChild->null_count;
1587 1 : if (pabyValidity == nullptr)
1588 : {
1589 1 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1590 1 : psChild->buffers[0] = pabyValidity;
1591 1 : if (pabyValidity == nullptr)
1592 0 : return false;
1593 : }
1594 1 : UnsetBit(pabyValidity, iFeat);
1595 : }
1596 : }
1597 : }
1598 :
1599 8 : return true;
1600 : }
1601 :
1602 : /************************************************************************/
1603 : /* FillWKBGeometryArray() */
1604 : /************************************************************************/
1605 :
1606 : template <class T>
1607 : static size_t
1608 1262 : FillWKBGeometryArray(struct ArrowArray *psChild,
1609 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1610 : const size_t nFeatureCountLimit,
1611 : const OGRGeomFieldDefn *poFieldDefn, const int i,
1612 : const size_t nMemLimit)
1613 : {
1614 1262 : const bool bIsNullable = CPL_TO_BOOL(poFieldDefn->IsNullable());
1615 1262 : psChild->n_buffers = 3;
1616 1262 : psChild->buffers = static_cast<const void **>(CPLCalloc(3, sizeof(void *)));
1617 1262 : uint8_t *pabyValidity = nullptr;
1618 : T *panOffsets = static_cast<T *>(
1619 1262 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(T) * (1 + nFeatureCountLimit)));
1620 1262 : if (panOffsets == nullptr)
1621 0 : return 0;
1622 1262 : psChild->buffers[1] = panOffsets;
1623 1262 : const auto eGeomType = poFieldDefn->GetType();
1624 3786 : auto poEmptyGeom =
1625 : std::unique_ptr<OGRGeometry>(OGRGeometryFactory::createGeometry(
1626 1262 : (eGeomType == wkbNone || wkbFlatten(eGeomType) == wkbUnknown)
1627 : ? wkbGeometryCollection
1628 : : eGeomType));
1629 :
1630 1262 : size_t nOffset = 0;
1631 1262 : size_t nFeatCount = 0;
1632 14446 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat, ++nFeatCount)
1633 : {
1634 13185 : panOffsets[iFeat] = static_cast<T>(nOffset);
1635 13185 : const auto poGeom = apoFeatures[iFeat]->GetGeomFieldRef(i);
1636 13185 : if (poGeom != nullptr)
1637 : {
1638 12610 : const size_t nLen = poGeom->WkbSize();
1639 12610 : if (nLen > nMemLimit - nOffset)
1640 : {
1641 1 : if (nFeatCount == 0)
1642 0 : return 0;
1643 1 : break;
1644 : }
1645 12609 : nOffset += static_cast<T>(nLen);
1646 : }
1647 575 : else if (bIsNullable)
1648 : {
1649 575 : ++psChild->null_count;
1650 575 : if (pabyValidity == nullptr)
1651 : {
1652 277 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1653 277 : psChild->buffers[0] = pabyValidity;
1654 277 : if (pabyValidity == nullptr)
1655 0 : return 0;
1656 : }
1657 575 : UnsetBit(pabyValidity, iFeat);
1658 : }
1659 0 : else if (poEmptyGeom)
1660 : {
1661 0 : const size_t nLen = poEmptyGeom->WkbSize();
1662 0 : if (nLen > nMemLimit - nOffset)
1663 : {
1664 0 : if (nFeatCount == 0)
1665 0 : return 0;
1666 0 : break;
1667 : }
1668 0 : nOffset += static_cast<T>(nLen);
1669 : }
1670 : }
1671 1262 : panOffsets[nFeatCount] = static_cast<T>(nOffset);
1672 :
1673 : GByte *pabyValues =
1674 1262 : static_cast<GByte *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nOffset + 1));
1675 1262 : if (pabyValues == nullptr)
1676 0 : return 0;
1677 1262 : psChild->buffers[2] = pabyValues;
1678 :
1679 1262 : nOffset = 0;
1680 14446 : for (size_t iFeat = 0; iFeat < nFeatCount; ++iFeat)
1681 : {
1682 13184 : const size_t nLen =
1683 13184 : static_cast<size_t>(panOffsets[iFeat + 1] - panOffsets[iFeat]);
1684 13184 : if (nLen)
1685 : {
1686 12609 : const auto poGeom = apoFeatures[iFeat]->GetGeomFieldRef(i);
1687 12609 : poGeom->exportToWkb(wkbNDR, pabyValues + nOffset, wkbVariantIso);
1688 12609 : nOffset += nLen;
1689 : }
1690 575 : else if (!bIsNullable && poEmptyGeom)
1691 : {
1692 0 : poEmptyGeom->exportToWkb(wkbNDR, pabyValues + nOffset,
1693 : wkbVariantIso);
1694 0 : nOffset += nLen;
1695 : }
1696 : }
1697 :
1698 1262 : return nFeatCount;
1699 : }
1700 :
1701 : /************************************************************************/
1702 : /* FillDateArray() */
1703 : /************************************************************************/
1704 :
1705 125 : static bool FillDateArray(struct ArrowArray *psChild,
1706 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1707 : const size_t nFeatureCountLimit,
1708 : const bool bIsNullable, const int i)
1709 : {
1710 125 : psChild->n_buffers = 2;
1711 125 : psChild->buffers = static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1712 125 : uint8_t *pabyValidity = nullptr;
1713 125 : int32_t *panValues = static_cast<int32_t *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
1714 : sizeof(int32_t) * (nFeatureCountLimit + 1)));
1715 125 : if (panValues == nullptr)
1716 0 : return false;
1717 125 : psChild->buffers[1] = panValues;
1718 475 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat)
1719 : {
1720 350 : auto &poFeature = apoFeatures[iFeat];
1721 350 : const auto psRawField = poFeature->GetRawFieldRef(i);
1722 350 : if (IsValidField(psRawField))
1723 : {
1724 : struct tm brokenDown;
1725 262 : memset(&brokenDown, 0, sizeof(brokenDown));
1726 262 : brokenDown.tm_year = psRawField->Date.Year - 1900;
1727 262 : brokenDown.tm_mon = psRawField->Date.Month - 1;
1728 262 : brokenDown.tm_mday = psRawField->Date.Day;
1729 262 : panValues[iFeat] =
1730 262 : static_cast<int>(CPLYMDHMSToUnixTime(&brokenDown) / 86400);
1731 : }
1732 88 : else if (bIsNullable)
1733 : {
1734 88 : panValues[iFeat] = 0;
1735 88 : ++psChild->null_count;
1736 88 : if (pabyValidity == nullptr)
1737 : {
1738 61 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1739 61 : psChild->buffers[0] = pabyValidity;
1740 61 : if (pabyValidity == nullptr)
1741 0 : return false;
1742 : }
1743 88 : UnsetBit(pabyValidity, iFeat);
1744 : }
1745 : else
1746 : {
1747 0 : panValues[iFeat] = 0;
1748 : }
1749 : }
1750 125 : return true;
1751 : }
1752 :
1753 : /************************************************************************/
1754 : /* FillTimeArray() */
1755 : /************************************************************************/
1756 :
1757 72 : static bool FillTimeArray(struct ArrowArray *psChild,
1758 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1759 : const size_t nFeatureCountLimit,
1760 : const bool bIsNullable, const int i)
1761 : {
1762 72 : psChild->n_buffers = 2;
1763 72 : psChild->buffers = static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1764 72 : uint8_t *pabyValidity = nullptr;
1765 72 : int32_t *panValues = static_cast<int32_t *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
1766 : sizeof(int32_t) * (nFeatureCountLimit + 1)));
1767 72 : if (panValues == nullptr)
1768 0 : return false;
1769 72 : psChild->buffers[1] = panValues;
1770 667 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat)
1771 : {
1772 595 : auto &poFeature = apoFeatures[iFeat];
1773 595 : const auto psRawField = poFeature->GetRawFieldRef(i);
1774 595 : if (IsValidField(psRawField))
1775 : {
1776 548 : panValues[iFeat] =
1777 548 : psRawField->Date.Hour * 3600000 +
1778 548 : psRawField->Date.Minute * 60000 +
1779 548 : static_cast<int>(psRawField->Date.Second * 1000 + 0.5f);
1780 : }
1781 47 : else if (bIsNullable)
1782 : {
1783 47 : panValues[iFeat] = 0;
1784 47 : ++psChild->null_count;
1785 47 : if (pabyValidity == nullptr)
1786 : {
1787 39 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1788 39 : psChild->buffers[0] = pabyValidity;
1789 39 : if (pabyValidity == nullptr)
1790 0 : return false;
1791 : }
1792 47 : UnsetBit(pabyValidity, iFeat);
1793 : }
1794 : else
1795 : {
1796 0 : panValues[iFeat] = 0;
1797 : }
1798 : }
1799 72 : return true;
1800 : }
1801 :
1802 : /************************************************************************/
1803 : /* FillDateTimeArray() */
1804 : /************************************************************************/
1805 :
1806 : static bool
1807 712 : FillDateTimeArray(struct ArrowArray *psChild,
1808 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1809 : const size_t nFeatureCountLimit, const bool bIsNullable,
1810 : const int i, int nFieldTZFlag)
1811 : {
1812 712 : psChild->n_buffers = 2;
1813 712 : psChild->buffers = static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1814 712 : uint8_t *pabyValidity = nullptr;
1815 712 : int64_t *panValues = static_cast<int64_t *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
1816 : sizeof(int64_t) * (nFeatureCountLimit + 1)));
1817 712 : if (panValues == nullptr)
1818 0 : return false;
1819 712 : psChild->buffers[1] = panValues;
1820 : struct tm brokenDown;
1821 712 : memset(&brokenDown, 0, sizeof(brokenDown));
1822 3141 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat)
1823 : {
1824 2429 : auto &poFeature = apoFeatures[iFeat];
1825 2429 : const auto psRawField = poFeature->GetRawFieldRef(i);
1826 2429 : if (IsValidField(psRawField))
1827 : {
1828 1670 : brokenDown.tm_year = psRawField->Date.Year - 1900;
1829 1670 : brokenDown.tm_mon = psRawField->Date.Month - 1;
1830 1670 : brokenDown.tm_mday = psRawField->Date.Day;
1831 1670 : brokenDown.tm_hour = psRawField->Date.Hour;
1832 1670 : brokenDown.tm_min = psRawField->Date.Minute;
1833 1670 : brokenDown.tm_sec = static_cast<int>(psRawField->Date.Second);
1834 : auto nVal =
1835 1670 : CPLYMDHMSToUnixTime(&brokenDown) * 1000 +
1836 1670 : (static_cast<int>(psRawField->Date.Second * 1000 + 0.5f) %
1837 1670 : 1000);
1838 1670 : if (nFieldTZFlag >= OGR_TZFLAG_MIXED_TZ &&
1839 65 : psRawField->Date.TZFlag > OGR_TZFLAG_MIXED_TZ)
1840 : {
1841 : // Convert for psRawField->Date.TZFlag to UTC
1842 65 : const int TZOffset =
1843 65 : (psRawField->Date.TZFlag - OGR_TZFLAG_UTC) * 15;
1844 65 : const int TZOffsetMS = TZOffset * 60 * 1000;
1845 65 : nVal -= TZOffsetMS;
1846 : }
1847 1670 : panValues[iFeat] = nVal;
1848 : }
1849 759 : else if (bIsNullable)
1850 : {
1851 759 : panValues[iFeat] = 0;
1852 759 : ++psChild->null_count;
1853 759 : if (pabyValidity == nullptr)
1854 : {
1855 261 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1856 261 : psChild->buffers[0] = pabyValidity;
1857 261 : if (pabyValidity == nullptr)
1858 0 : return false;
1859 : }
1860 759 : UnsetBit(pabyValidity, iFeat);
1861 : }
1862 : else
1863 : {
1864 0 : panValues[iFeat] = 0;
1865 : }
1866 : }
1867 712 : return true;
1868 : }
1869 :
1870 : /************************************************************************/
1871 : /* FillDateTimeArrayWithTimeZone() */
1872 : /************************************************************************/
1873 :
1874 3 : static bool FillDateTimeArrayWithTimeZone(
1875 : struct ArrowArray *psChild,
1876 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1877 : const size_t nFeatureCountLimit, const bool bIsNullable, const int i,
1878 : int nFieldTZFlag)
1879 : {
1880 3 : psChild->n_children = 2;
1881 3 : psChild->children = static_cast<struct ArrowArray **>(
1882 3 : CPLCalloc(2, sizeof(struct ArrowArray *)));
1883 3 : psChild->n_buffers = 1;
1884 3 : psChild->buffers = static_cast<const void **>(CPLCalloc(1, sizeof(void *)));
1885 3 : uint8_t *pabyValidity = nullptr;
1886 :
1887 : // Create sub-array for timestamp in UTC
1888 6 : psChild->children[0] = static_cast<struct ArrowArray *>(
1889 3 : CPLCalloc(1, sizeof(struct ArrowArray)));
1890 3 : psChild->children[0]->n_buffers = 2;
1891 6 : psChild->children[0]->buffers =
1892 3 : static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1893 3 : psChild->children[0]->release = OGRLayerDefaultReleaseArray;
1894 : int64_t *panTimestamps = static_cast<int64_t *>(
1895 3 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(int64_t) * nFeatureCountLimit));
1896 3 : if (panTimestamps == nullptr)
1897 0 : return false;
1898 3 : psChild->children[0]->buffers[1] = panTimestamps;
1899 :
1900 : // Create sub-array for offset to UTC in minutes
1901 6 : psChild->children[1] = static_cast<struct ArrowArray *>(
1902 3 : CPLCalloc(1, sizeof(struct ArrowArray)));
1903 3 : psChild->children[1]->n_buffers = 2;
1904 6 : psChild->children[1]->buffers =
1905 3 : static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1906 3 : psChild->children[1]->release = OGRLayerDefaultReleaseArray;
1907 : int16_t *panOffsetsMinutes = static_cast<int16_t *>(
1908 3 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(int16_t) * nFeatureCountLimit));
1909 3 : if (panOffsetsMinutes == nullptr)
1910 0 : return false;
1911 3 : psChild->children[1]->buffers[1] = panOffsetsMinutes;
1912 :
1913 : struct tm brokenDown;
1914 3 : memset(&brokenDown, 0, sizeof(brokenDown));
1915 :
1916 15 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat)
1917 : {
1918 12 : auto &poFeature = apoFeatures[iFeat];
1919 12 : const auto psRawField = poFeature->GetRawFieldRef(i);
1920 12 : panTimestamps[iFeat] = 0;
1921 12 : panOffsetsMinutes[iFeat] = 0;
1922 12 : if (IsValidField(psRawField))
1923 : {
1924 9 : brokenDown.tm_year = psRawField->Date.Year - 1900;
1925 9 : brokenDown.tm_mon = psRawField->Date.Month - 1;
1926 9 : brokenDown.tm_mday = psRawField->Date.Day;
1927 9 : brokenDown.tm_hour = psRawField->Date.Hour;
1928 9 : brokenDown.tm_min = psRawField->Date.Minute;
1929 9 : brokenDown.tm_sec = static_cast<int>(psRawField->Date.Second);
1930 : auto nVal =
1931 9 : CPLYMDHMSToUnixTime(&brokenDown) * 1000 +
1932 9 : (static_cast<int>(psRawField->Date.Second * 1000 + 0.5f) %
1933 9 : 1000);
1934 9 : if (nFieldTZFlag >= OGR_TZFLAG_MIXED_TZ &&
1935 9 : psRawField->Date.TZFlag > OGR_TZFLAG_MIXED_TZ)
1936 : {
1937 : // Convert for psRawField->Date.TZFlag to UTC
1938 6 : const int TZOffsetMinute =
1939 6 : (psRawField->Date.TZFlag - OGR_TZFLAG_UTC) * 15;
1940 6 : const int TZOffsetMS = TZOffsetMinute * 60 * 1000;
1941 6 : nVal -= TZOffsetMS;
1942 :
1943 6 : panOffsetsMinutes[iFeat] = static_cast<int16_t>(TZOffsetMinute);
1944 : }
1945 9 : panTimestamps[iFeat] = nVal;
1946 : }
1947 3 : else if (bIsNullable)
1948 : {
1949 3 : ++psChild->null_count;
1950 3 : if (pabyValidity == nullptr)
1951 : {
1952 3 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1953 3 : psChild->buffers[0] = pabyValidity;
1954 3 : if (pabyValidity == nullptr)
1955 0 : return false;
1956 : }
1957 3 : UnsetBit(pabyValidity, iFeat);
1958 : }
1959 : }
1960 3 : return true;
1961 : }
1962 :
1963 : /************************************************************************/
1964 : /* FillDateTimeArrayAsString() */
1965 : /************************************************************************/
1966 :
1967 : static size_t
1968 9 : FillDateTimeArrayAsString(struct ArrowArray *psChild,
1969 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1970 : const size_t nFeatureCountLimit,
1971 : const bool bIsNullable, const int i,
1972 : const size_t nMemLimit)
1973 : {
1974 9 : psChild->n_buffers = 3;
1975 9 : psChild->buffers = static_cast<const void **>(CPLCalloc(3, sizeof(void *)));
1976 9 : uint8_t *pabyValidity = nullptr;
1977 : using T = uint32_t;
1978 : T *panOffsets = static_cast<T *>(
1979 9 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(T) * (1 + nFeatureCountLimit)));
1980 9 : if (panOffsets == nullptr)
1981 0 : return 0;
1982 9 : psChild->buffers[1] = panOffsets;
1983 :
1984 9 : size_t nOffset = 0;
1985 9 : size_t nFeatCount = 0;
1986 51 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat, ++nFeatCount)
1987 : {
1988 42 : panOffsets[iFeat] = static_cast<T>(nOffset);
1989 42 : const auto psRawField = apoFeatures[iFeat]->GetRawFieldRef(i);
1990 42 : if (IsValidField(psRawField))
1991 : {
1992 39 : size_t nLen = strlen("YYYY-MM-DDTHH:MM:SS");
1993 39 : if (fmodf(psRawField->Date.Second, 1.0f) != 0)
1994 27 : nLen += strlen(".sss");
1995 39 : if (psRawField->Date.TZFlag == OGR_TZFLAG_UTC)
1996 7 : nLen += 1; // 'Z'
1997 32 : else if (psRawField->Date.TZFlag > OGR_TZFLAG_MIXED_TZ)
1998 14 : nLen += strlen("+hh:mm");
1999 39 : if (nLen > nMemLimit - nOffset)
2000 : {
2001 0 : if (nFeatCount == 0)
2002 0 : return 0;
2003 0 : break;
2004 : }
2005 39 : nOffset += static_cast<T>(nLen);
2006 : }
2007 3 : else if (bIsNullable)
2008 : {
2009 3 : ++psChild->null_count;
2010 3 : if (pabyValidity == nullptr)
2011 : {
2012 3 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
2013 3 : psChild->buffers[0] = pabyValidity;
2014 3 : if (pabyValidity == nullptr)
2015 0 : return 0;
2016 : }
2017 3 : UnsetBit(pabyValidity, iFeat);
2018 : }
2019 : }
2020 9 : panOffsets[nFeatCount] = static_cast<T>(nOffset);
2021 :
2022 : char *pachValues =
2023 9 : static_cast<char *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nOffset + 1));
2024 9 : if (pachValues == nullptr)
2025 0 : return 0;
2026 9 : psChild->buffers[2] = pachValues;
2027 :
2028 9 : nOffset = 0;
2029 : char szBuffer[OGR_SIZEOF_ISO8601_DATETIME_BUFFER];
2030 : OGRISO8601Format sFormat;
2031 9 : sFormat.ePrecision = OGRISO8601Precision::AUTO;
2032 51 : for (size_t iFeat = 0; iFeat < nFeatCount; ++iFeat)
2033 : {
2034 42 : const int nLen =
2035 42 : static_cast<int>(panOffsets[iFeat + 1] - panOffsets[iFeat]);
2036 42 : if (nLen)
2037 : {
2038 39 : const auto psRawField = apoFeatures[iFeat]->GetRawFieldRef(i);
2039 39 : int nBufSize = OGRGetISO8601DateTime(psRawField, sFormat, szBuffer);
2040 39 : if (nBufSize)
2041 : {
2042 39 : memcpy(pachValues + nOffset, szBuffer,
2043 39 : std::min(nLen, nBufSize));
2044 : }
2045 39 : if (nBufSize < nLen)
2046 : {
2047 5 : memset(pachValues + nOffset + nBufSize, 0, nLen - nBufSize);
2048 : }
2049 39 : nOffset += nLen;
2050 : }
2051 : }
2052 :
2053 9 : return nFeatCount;
2054 : }
2055 :
2056 : /************************************************************************/
2057 : /* GetNextArrowArray() */
2058 : /************************************************************************/
2059 :
2060 : /** Default implementation of the ArrowArrayStream::get_next() callback.
2061 : *
2062 : * To be used by driver implementations that have a custom GetArrowStream()
2063 : * implementation.
2064 : *
2065 : * @since GDAL 3.6
2066 : */
2067 3634 : int OGRLayer::GetNextArrowArray(struct ArrowArrayStream *stream,
2068 : struct ArrowArray *out_array)
2069 : {
2070 3634 : ArrowArrayStreamPrivateDataSharedDataWrapper *poPrivate =
2071 : static_cast<ArrowArrayStreamPrivateDataSharedDataWrapper *>(
2072 : stream->private_data);
2073 :
2074 3634 : const bool bIncludeFID = CPLTestBool(
2075 : m_aosArrowArrayStreamOptions.FetchNameValueDef("INCLUDE_FID", "YES"));
2076 3634 : const bool bDateTimeAsString = m_aosArrowArrayStreamOptions.FetchBool(
2077 : GAS_OPT_DATETIME_AS_STRING, false);
2078 3634 : int nMaxBatchSize = atoi(m_aosArrowArrayStreamOptions.FetchNameValueDef(
2079 : "MAX_FEATURES_IN_BATCH", "65536"));
2080 3634 : if (nMaxBatchSize <= 0)
2081 0 : nMaxBatchSize = 1;
2082 3634 : if (nMaxBatchSize > INT_MAX - 1)
2083 0 : nMaxBatchSize = INT_MAX - 1;
2084 :
2085 : auto &oFeatureQueue =
2086 3634 : m_poSharedArrowArrayStreamPrivateData->m_oFeatureQueue;
2087 :
2088 3634 : memset(out_array, 0, sizeof(*out_array));
2089 :
2090 3634 : auto poLayerDefn = GetLayerDefn();
2091 3634 : const int nFieldCount = poLayerDefn->GetFieldCount();
2092 3634 : const int nGeomFieldCount = poLayerDefn->GetGeomFieldCount();
2093 3634 : const int nMaxChildren =
2094 3634 : (bIncludeFID ? 1 : 0) + nFieldCount + nGeomFieldCount;
2095 3634 : int iSchemaChild = 0;
2096 :
2097 3634 : if (!m_poSharedArrowArrayStreamPrivateData->m_anQueriedFIDs.empty())
2098 : {
2099 6 : if (poPrivate->poShared->m_bEOF)
2100 : {
2101 2 : return 0;
2102 : }
2103 4 : if (m_poSharedArrowArrayStreamPrivateData->m_iQueriedFIDS == 0)
2104 : {
2105 4 : CPLDebug("OGR", "Using fast FID filtering");
2106 : }
2107 8 : while (
2108 24 : oFeatureQueue.size() < static_cast<size_t>(nMaxBatchSize) &&
2109 12 : m_poSharedArrowArrayStreamPrivateData->m_iQueriedFIDS <
2110 12 : m_poSharedArrowArrayStreamPrivateData->m_anQueriedFIDs.size())
2111 : {
2112 : const auto nFID =
2113 8 : m_poSharedArrowArrayStreamPrivateData->m_anQueriedFIDs
2114 8 : [m_poSharedArrowArrayStreamPrivateData->m_iQueriedFIDS];
2115 16 : auto poFeature = std::unique_ptr<OGRFeature>(GetFeature(nFID));
2116 8 : ++m_poSharedArrowArrayStreamPrivateData->m_iQueriedFIDS;
2117 8 : if (poFeature && (m_poFilterGeom == nullptr ||
2118 0 : FilterGeometry(poFeature->GetGeomFieldRef(
2119 8 : m_iGeomFieldFilter))))
2120 : {
2121 4 : oFeatureQueue.emplace_back(std::move(poFeature));
2122 : }
2123 : }
2124 8 : if (m_poSharedArrowArrayStreamPrivateData->m_iQueriedFIDS ==
2125 4 : m_poSharedArrowArrayStreamPrivateData->m_anQueriedFIDs.size())
2126 : {
2127 4 : poPrivate->poShared->m_bEOF = true;
2128 : }
2129 : }
2130 3628 : else if (!poPrivate->poShared->m_bEOF)
2131 : {
2132 18978 : while (oFeatureQueue.size() < static_cast<size_t>(nMaxBatchSize))
2133 : {
2134 18975 : auto poFeature = std::unique_ptr<OGRFeature>(GetNextFeature());
2135 18975 : if (!poFeature)
2136 : {
2137 1868 : poPrivate->poShared->m_bEOF = true;
2138 1868 : break;
2139 : }
2140 17107 : oFeatureQueue.emplace_back(std::move(poFeature));
2141 : }
2142 : }
2143 3632 : if (oFeatureQueue.empty())
2144 : {
2145 2150 : return 0;
2146 : }
2147 :
2148 1482 : out_array->release = OGRLayerDefaultReleaseArray;
2149 1482 : out_array->null_count = 0;
2150 :
2151 1482 : out_array->n_children = nMaxChildren;
2152 1482 : out_array->children = static_cast<struct ArrowArray **>(
2153 1482 : CPLCalloc(nMaxChildren, sizeof(struct ArrowArray *)));
2154 1482 : out_array->release = OGRLayerDefaultReleaseArray;
2155 1482 : out_array->n_buffers = 1;
2156 1482 : out_array->buffers =
2157 1482 : static_cast<const void **>(CPLCalloc(1, sizeof(void *)));
2158 :
2159 1482 : size_t nFeatureCount = oFeatureQueue.size();
2160 1482 : const uint32_t nMemLimit = OGRArrowArrayHelper::GetMemLimit();
2161 2964 : std::set<int> anArrayIndicesOfStructDateTime;
2162 1482 : if (bIncludeFID)
2163 : {
2164 2696 : out_array->children[iSchemaChild] = static_cast<struct ArrowArray *>(
2165 1348 : CPLCalloc(1, sizeof(struct ArrowArray)));
2166 1348 : auto psChild = out_array->children[iSchemaChild];
2167 1348 : ++iSchemaChild;
2168 1348 : psChild->release = OGRLayerDefaultReleaseArray;
2169 1348 : psChild->n_buffers = 2;
2170 1348 : psChild->buffers =
2171 1348 : static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
2172 : int64_t *panValues =
2173 1348 : static_cast<int64_t *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
2174 : sizeof(int64_t) * (oFeatureQueue.size() + 1)));
2175 1348 : if (panValues == nullptr)
2176 0 : goto error;
2177 1348 : psChild->buffers[1] = panValues;
2178 18007 : for (size_t iFeat = 0; iFeat < oFeatureQueue.size(); ++iFeat)
2179 : {
2180 16659 : panValues[iFeat] = oFeatureQueue[iFeat]->GetFID();
2181 : }
2182 : }
2183 :
2184 13718 : for (int i = 0; i < nFieldCount; ++i)
2185 : {
2186 12261 : const auto poFieldDefn = poLayerDefn->GetFieldDefn(i);
2187 12261 : if (poFieldDefn->IsIgnored())
2188 : {
2189 13 : continue;
2190 : }
2191 :
2192 24496 : out_array->children[iSchemaChild] = static_cast<struct ArrowArray *>(
2193 12248 : CPLCalloc(1, sizeof(struct ArrowArray)));
2194 12248 : auto psChild = out_array->children[iSchemaChild];
2195 12248 : psChild->release = OGRLayerDefaultReleaseArray;
2196 12248 : const bool bIsNullable = CPL_TO_BOOL(poFieldDefn->IsNullable());
2197 12248 : const auto eSubType = poFieldDefn->GetSubType();
2198 12248 : switch (poFieldDefn->GetType())
2199 : {
2200 3528 : case OFTInteger:
2201 : {
2202 3528 : if (eSubType == OFSTBoolean)
2203 : {
2204 138 : if (!FillBoolArray(psChild, oFeatureQueue, nFeatureCount,
2205 : bIsNullable, &OGRField::Integer, i))
2206 0 : goto error;
2207 : }
2208 3390 : else if (eSubType == OFSTInt16)
2209 : {
2210 478 : if (!FillArray<int16_t>(psChild, oFeatureQueue,
2211 : nFeatureCount, bIsNullable,
2212 : &OGRField::Integer, i))
2213 0 : goto error;
2214 : }
2215 : else
2216 : {
2217 2912 : if (!FillArray<int32_t>(psChild, oFeatureQueue,
2218 : nFeatureCount, bIsNullable,
2219 : &OGRField::Integer, i))
2220 0 : goto error;
2221 : }
2222 :
2223 3528 : const auto &osDomainName = poFieldDefn->GetDomainName();
2224 3528 : if (!osDomainName.empty())
2225 : {
2226 13 : auto poDS = GetDataset();
2227 13 : if (poDS)
2228 : {
2229 : const auto poFieldDomain =
2230 13 : poDS->GetFieldDomain(osDomainName);
2231 26 : if (poFieldDomain &&
2232 13 : poFieldDomain->GetDomainType() == OFDT_CODED)
2233 : {
2234 13 : const OGRCodedFieldDomain *poCodedDomain =
2235 : static_cast<const OGRCodedFieldDomain *>(
2236 : poFieldDomain);
2237 13 : OGRArrowArrayHelper::FillDict(psChild,
2238 : poCodedDomain);
2239 : }
2240 : }
2241 : }
2242 :
2243 3528 : break;
2244 : }
2245 :
2246 338 : case OFTInteger64:
2247 : {
2248 338 : if (!FillArray<int64_t>(psChild, oFeatureQueue, nFeatureCount,
2249 : bIsNullable, &OGRField::Integer64, i))
2250 0 : goto error;
2251 338 : break;
2252 : }
2253 :
2254 2103 : case OFTReal:
2255 : {
2256 2103 : if (eSubType == OFSTFloat32)
2257 : {
2258 478 : if (!FillArray<float>(psChild, oFeatureQueue, nFeatureCount,
2259 : bIsNullable, &OGRField::Real, i))
2260 0 : goto error;
2261 : }
2262 : else
2263 : {
2264 1625 : if (!FillArray<double>(psChild, oFeatureQueue,
2265 : nFeatureCount, bIsNullable,
2266 : &OGRField::Real, i))
2267 0 : goto error;
2268 : }
2269 2103 : break;
2270 : }
2271 :
2272 3777 : case OFTString:
2273 : case OFTWideString:
2274 : {
2275 3777 : const size_t nThisFeatureCount = FillStringArray<int32_t>(
2276 : psChild, oFeatureQueue, nFeatureCount, bIsNullable, i,
2277 : nMemLimit);
2278 3777 : if (nThisFeatureCount == 0)
2279 : {
2280 19 : goto error_max_mem;
2281 : }
2282 3758 : if (nThisFeatureCount < nFeatureCount)
2283 1 : nFeatureCount = nThisFeatureCount;
2284 3758 : break;
2285 : }
2286 :
2287 913 : case OFTBinary:
2288 : {
2289 913 : const int nWidth = poFieldDefn->GetWidth();
2290 913 : if (nWidth > 0)
2291 : {
2292 8 : if (nFeatureCount > nMemLimit / nWidth)
2293 : {
2294 1 : nFeatureCount = nMemLimit / nWidth;
2295 1 : if (nFeatureCount == 0)
2296 0 : goto error_max_mem;
2297 : }
2298 8 : if (!FillFixedWidthBinaryArray(psChild, oFeatureQueue,
2299 : nFeatureCount, bIsNullable,
2300 : nWidth, i))
2301 0 : goto error;
2302 : }
2303 : else
2304 : {
2305 905 : const size_t nThisFeatureCount = FillBinaryArray<int32_t>(
2306 : psChild, oFeatureQueue, nFeatureCount, bIsNullable, i,
2307 : nMemLimit);
2308 905 : if (nThisFeatureCount == 0)
2309 : {
2310 1 : goto error_max_mem;
2311 : }
2312 904 : if (nThisFeatureCount < nFeatureCount)
2313 1 : nFeatureCount = nThisFeatureCount;
2314 : }
2315 912 : break;
2316 : }
2317 :
2318 234 : case OFTIntegerList:
2319 : {
2320 : size_t nThisFeatureCount;
2321 234 : if (eSubType == OFSTBoolean)
2322 : {
2323 : nThisFeatureCount =
2324 49 : FillListArrayBool<int32_t, GetFromIntegerList>(
2325 : psChild, oFeatureQueue, nFeatureCount, bIsNullable,
2326 : i, nMemLimit);
2327 : }
2328 185 : else if (eSubType == OFSTInt16)
2329 : {
2330 : nThisFeatureCount =
2331 28 : FillListArray<int32_t, int16_t, GetFromIntegerList>(
2332 : psChild, oFeatureQueue, nFeatureCount, bIsNullable,
2333 : i, nMemLimit);
2334 : }
2335 : else
2336 : {
2337 : nThisFeatureCount =
2338 157 : FillListArray<int32_t, int32_t, GetFromIntegerList>(
2339 : psChild, oFeatureQueue, nFeatureCount, bIsNullable,
2340 : i, nMemLimit);
2341 : }
2342 234 : if (nThisFeatureCount == 0)
2343 : {
2344 2 : goto error_max_mem;
2345 : }
2346 232 : if (nThisFeatureCount < nFeatureCount)
2347 2 : nFeatureCount = nThisFeatureCount;
2348 232 : break;
2349 : }
2350 :
2351 75 : case OFTInteger64List:
2352 : {
2353 : const size_t nThisFeatureCount =
2354 75 : FillListArray<int32_t, int64_t, GetFromInteger64List>(
2355 : psChild, oFeatureQueue, nFeatureCount, bIsNullable, i,
2356 : nMemLimit);
2357 75 : if (nThisFeatureCount == 0)
2358 : {
2359 1 : goto error_max_mem;
2360 : }
2361 74 : if (nThisFeatureCount < nFeatureCount)
2362 1 : nFeatureCount = nThisFeatureCount;
2363 74 : break;
2364 : }
2365 :
2366 156 : case OFTRealList:
2367 : {
2368 : size_t nThisFeatureCount;
2369 156 : if (eSubType == OFSTFloat32)
2370 : {
2371 : nThisFeatureCount =
2372 41 : FillListArray<int32_t, float, GetFromRealList>(
2373 : psChild, oFeatureQueue, nFeatureCount, bIsNullable,
2374 : i, nMemLimit);
2375 : }
2376 : else
2377 : {
2378 : nThisFeatureCount =
2379 115 : FillListArray<int32_t, double, GetFromRealList>(
2380 : psChild, oFeatureQueue, nFeatureCount, bIsNullable,
2381 : i, nMemLimit);
2382 : }
2383 156 : if (nThisFeatureCount == 0)
2384 : {
2385 1 : goto error_max_mem;
2386 : }
2387 155 : if (nThisFeatureCount < nFeatureCount)
2388 1 : nFeatureCount = nThisFeatureCount;
2389 155 : break;
2390 : }
2391 :
2392 203 : case OFTStringList:
2393 : case OFTWideStringList:
2394 : {
2395 203 : const size_t nThisFeatureCount = FillStringListArray<int32_t>(
2396 : psChild, oFeatureQueue, nFeatureCount, bIsNullable, i,
2397 : nMemLimit);
2398 203 : if (nThisFeatureCount == 0)
2399 : {
2400 1 : goto error_max_mem;
2401 : }
2402 202 : if (nThisFeatureCount < nFeatureCount)
2403 1 : nFeatureCount = nThisFeatureCount;
2404 202 : break;
2405 : }
2406 :
2407 125 : case OFTDate:
2408 : {
2409 125 : if (!FillDateArray(psChild, oFeatureQueue, nFeatureCount,
2410 : bIsNullable, i))
2411 0 : goto error;
2412 125 : break;
2413 : }
2414 :
2415 72 : case OFTTime:
2416 : {
2417 72 : if (!FillTimeArray(psChild, oFeatureQueue, nFeatureCount,
2418 : bIsNullable, i))
2419 0 : goto error;
2420 72 : break;
2421 : }
2422 :
2423 724 : case OFTDateTime:
2424 : {
2425 724 : if (bDateTimeAsString)
2426 : {
2427 9 : const size_t nThisFeatureCount = FillDateTimeArrayAsString(
2428 : psChild, oFeatureQueue, nFeatureCount, bIsNullable, i,
2429 : nMemLimit);
2430 9 : if (nThisFeatureCount == 0)
2431 : {
2432 0 : goto error_max_mem;
2433 : }
2434 9 : if (nThisFeatureCount < nFeatureCount)
2435 0 : nFeatureCount = nThisFeatureCount;
2436 : }
2437 : else
2438 : {
2439 : const char *pszTZOverride =
2440 715 : m_aosArrowArrayStreamOptions.FetchNameValue("TIMEZONE");
2441 715 : const int nTZFlag = poFieldDefn->GetTZFlag();
2442 715 : if ((pszTZOverride && EQUAL(pszTZOverride, "mixed")) ||
2443 706 : (!pszTZOverride && nTZFlag == OGR_TZFLAG_MIXED_TZ))
2444 :
2445 : {
2446 3 : anArrayIndicesOfStructDateTime.insert(iSchemaChild);
2447 3 : if (!FillDateTimeArrayWithTimeZone(
2448 : psChild, oFeatureQueue, nFeatureCount,
2449 : bIsNullable, i, nTZFlag))
2450 : {
2451 0 : goto error;
2452 : }
2453 : }
2454 712 : else if (!FillDateTimeArray(psChild, oFeatureQueue,
2455 : nFeatureCount, bIsNullable, i,
2456 : nTZFlag))
2457 : {
2458 0 : goto error;
2459 : }
2460 : }
2461 724 : break;
2462 : }
2463 : }
2464 :
2465 12223 : ++iSchemaChild;
2466 : }
2467 2722 : for (int i = 0; i < nGeomFieldCount; ++i)
2468 : {
2469 1265 : const auto poFieldDefn = poLayerDefn->GetGeomFieldDefn(i);
2470 1265 : if (poFieldDefn->IsIgnored())
2471 : {
2472 3 : continue;
2473 : }
2474 :
2475 2524 : out_array->children[iSchemaChild] = static_cast<struct ArrowArray *>(
2476 1262 : CPLCalloc(1, sizeof(struct ArrowArray)));
2477 1262 : auto psChild = out_array->children[iSchemaChild];
2478 1262 : ++iSchemaChild;
2479 1262 : psChild->release = OGRLayerDefaultReleaseArray;
2480 1262 : psChild->length = oFeatureQueue.size();
2481 1262 : const size_t nThisFeatureCount = FillWKBGeometryArray<int32_t>(
2482 : psChild, oFeatureQueue, nFeatureCount, poFieldDefn, i, nMemLimit);
2483 1262 : if (nThisFeatureCount == 0)
2484 : {
2485 0 : goto error_max_mem;
2486 : }
2487 1262 : if (nThisFeatureCount < nFeatureCount)
2488 1 : nFeatureCount = nThisFeatureCount;
2489 : }
2490 :
2491 : // Remove consumed features from the queue
2492 1457 : if (nFeatureCount == oFeatureQueue.size())
2493 1448 : oFeatureQueue.clear();
2494 : else
2495 : {
2496 27 : for (size_t i = 0; i < nFeatureCount; ++i)
2497 : {
2498 18 : oFeatureQueue.pop_front();
2499 : }
2500 : }
2501 :
2502 1457 : out_array->n_children = iSchemaChild;
2503 1457 : out_array->length = nFeatureCount;
2504 16186 : for (int i = 0; i < out_array->n_children; ++i)
2505 : {
2506 14729 : out_array->children[i]->length = nFeatureCount;
2507 14729 : if (cpl::contains(anArrayIndicesOfStructDateTime, i))
2508 : {
2509 9 : for (int j = 0; j < out_array->children[i]->n_children; ++j)
2510 : {
2511 6 : out_array->children[i]->children[j]->length = nFeatureCount;
2512 : }
2513 : }
2514 : }
2515 :
2516 1457 : return 0;
2517 :
2518 25 : error_max_mem:
2519 25 : CPLError(CE_Failure, CPLE_AppDefined,
2520 : "Too large feature: not even a single feature can be returned");
2521 25 : error:
2522 25 : oFeatureQueue.clear();
2523 25 : poPrivate->poShared->m_bEOF = true;
2524 25 : out_array->release(out_array);
2525 25 : memset(out_array, 0, sizeof(*out_array));
2526 25 : return ENOMEM;
2527 : }
2528 :
2529 : /************************************************************************/
2530 : /* StaticGetNextArrowArray() */
2531 : /************************************************************************/
2532 :
2533 : /** Default implementation of the ArrowArrayStream::get_next() callback.
2534 : *
2535 : * To be used by driver implementations that have a custom GetArrowStream()
2536 : * implementation.
2537 : *
2538 : * @since GDAL 3.6
2539 : */
2540 4628 : int OGRLayer::StaticGetNextArrowArray(struct ArrowArrayStream *stream,
2541 : struct ArrowArray *out_array)
2542 : {
2543 : auto poLayer = static_cast<ArrowArrayStreamPrivateDataSharedDataWrapper *>(
2544 4628 : stream->private_data)
2545 4628 : ->poShared->m_poLayer;
2546 4628 : if (poLayer == nullptr)
2547 : {
2548 1 : CPLError(CE_Failure, CPLE_NotSupported,
2549 : "Calling get_next() on a freed OGRLayer is not supported");
2550 1 : return EINVAL;
2551 : }
2552 4627 : return poLayer->GetNextArrowArray(stream, out_array);
2553 : }
2554 :
2555 : /************************************************************************/
2556 : /* ReleaseStream() */
2557 : /************************************************************************/
2558 :
2559 : /** Release a ArrowArrayStream.
2560 : *
2561 : * To be used by driver implementations that have a custom GetArrowStream()
2562 : * implementation.
2563 : *
2564 : * @param stream Arrow array stream to release.
2565 : * @since GDAL 3.6
2566 : */
2567 2278 : void OGRLayer::ReleaseStream(struct ArrowArrayStream *stream)
2568 : {
2569 2278 : assert(stream->release == OGRLayer::ReleaseStream);
2570 2278 : ArrowArrayStreamPrivateDataSharedDataWrapper *poPrivate =
2571 : static_cast<ArrowArrayStreamPrivateDataSharedDataWrapper *>(
2572 : stream->private_data);
2573 2278 : poPrivate->poShared->m_bArrowArrayStreamInProgress = false;
2574 2278 : poPrivate->poShared->m_bEOF = false;
2575 2278 : if (poPrivate->poShared->m_poLayer)
2576 2228 : poPrivate->poShared->m_poLayer->ResetReading();
2577 2278 : delete poPrivate;
2578 2278 : stream->private_data = nullptr;
2579 2278 : stream->release = nullptr;
2580 2278 : }
2581 :
2582 : /************************************************************************/
2583 : /* GetLastErrorArrowArrayStream() */
2584 : /************************************************************************/
2585 :
2586 : /** Default implementation of the ArrowArrayStream::get_last_error() callback.
2587 : *
2588 : * To be used by driver implementations that have a custom GetArrowStream()
2589 : * implementation.
2590 : *
2591 : * @since GDAL 3.6
2592 : */
2593 3 : const char *OGRLayer::GetLastErrorArrowArrayStream(struct ArrowArrayStream *)
2594 : {
2595 3 : const char *pszLastErrorMsg = CPLGetLastErrorMsg();
2596 3 : return pszLastErrorMsg[0] != '\0' ? pszLastErrorMsg : nullptr;
2597 : }
2598 :
2599 : /************************************************************************/
2600 : /* GetArrowStream() */
2601 : /************************************************************************/
2602 :
2603 : /** Get a Arrow C stream.
2604 : *
2605 : * On successful return, and when the stream interfaces is no longer needed, it
2606 : * must must be freed with out_stream->release(out_stream). Please carefully
2607 : * read https://arrow.apache.org/docs/format/CStreamInterface.html for more
2608 : * details on using Arrow C stream.
2609 : *
2610 : * The method may take into account ignored fields set with SetIgnoredFields()
2611 : * (the default implementation does), and should take into account filters set
2612 : * with SetSpatialFilter() and SetAttributeFilter(). Note however that
2613 : * specialized implementations may fallback to the default (slower)
2614 : * implementation when filters are set.
2615 : * Drivers that have a specialized implementation should advertise the
2616 : * OLCFastGetArrowStream capability.
2617 : *
2618 : * There are extra precautions to take into account in a OGR context. Unless
2619 : * otherwise specified by a particular driver implementation, the get_schema(),
2620 : * get_next() and get_last_error() function pointers of the ArrowArrayStream
2621 : * structure should no longer be used after the OGRLayer, from which the
2622 : * ArrowArrayStream structure was initialized, has been destroyed (typically at
2623 : * dataset closing). The reason is that those function pointers will typically
2624 : * point to methods of the OGRLayer instance.
2625 : * However, the ArrowSchema and ArrowArray structures filled from those
2626 : * callbacks can be used and must be released independently from the
2627 : * ArrowArrayStream or the layer.
2628 : *
2629 : * Furthermore, unless otherwise specified by a particular driver
2630 : * implementation, only one ArrowArrayStream can be active at a time on
2631 : * a given layer (that is the last active one must be explicitly released before
2632 : * a next one is asked). Changing filter state, ignored columns, modifying the
2633 : * schema or using ResetReading()/GetNextFeature() while using a
2634 : * ArrowArrayStream is strongly discouraged and may lead to unexpected results.
2635 : * As a rule of thumb, no OGRLayer methods that affect the state of a layer
2636 : * should be called on a layer, while an ArrowArrayStream on it is active.
2637 : *
2638 : * Starting with GDAL 3.8, the ArrowSchema::metadata field filled by the
2639 : * get_schema() callback may be set with the potential following items:
2640 : * <ul>
2641 : * <li>"GDAL:OGR:type": value of OGRFieldDefn::GetType(): (added in 3.11)
2642 : * Only used for DateTime fields when the DATETIME_AS_STRING=YES option is
2643 : * specified.</li>
2644 : * <li>"GDAL:OGR:alternative_name": value of
2645 : * OGRFieldDefn::GetAlternativeNameRef()</li>
2646 : * <li>"GDAL:OGR:comment": value of OGRFieldDefn::GetComment()</li>
2647 : * <li>"GDAL:OGR:default": value of OGRFieldDefn::GetDefault()</li>
2648 : * <li>"GDAL:OGR:subtype": value of OGRFieldDefn::GetSubType()</li>
2649 : * <li>"GDAL:OGR:width": value of OGRFieldDefn::GetWidth() (serialized as a
2650 : * string)</li>
2651 : * <li>"GDAL:OGR:unique": value of OGRFieldDefn::IsUnique() (serialized as
2652 : * "true" or "false")</li>
2653 : * <li>"GDAL:OGR:domain_name": value of OGRFieldDefn::GetDomainName()</li>
2654 : * </ul>
2655 : *
2656 : * A potential usage can be:
2657 : \code{.cpp}
2658 : struct ArrowArrayStream stream;
2659 : if( !poLayer->GetArrowStream(&stream, nullptr))
2660 : {
2661 : CPLError(CE_Failure, CPLE_AppDefined, "GetArrowStream() failed");
2662 : exit(1);
2663 : }
2664 : struct ArrowSchema schema;
2665 : if( stream.get_schema(&stream, &schema) == 0 )
2666 : {
2667 : // Do something useful
2668 : schema.release(schema);
2669 : }
2670 : while( true )
2671 : {
2672 : struct ArrowArray array;
2673 : // Look for an error (get_next() returning a non-zero code), or
2674 : // end of iteration (array.release == nullptr)
2675 : if( stream.get_next(&stream, &array) != 0 ||
2676 : array.release == nullptr )
2677 : {
2678 : break;
2679 : }
2680 : // Do something useful
2681 : array.release(&array);
2682 : }
2683 : stream.release(&stream);
2684 : \endcode
2685 : *
2686 : * A full example is available in the
2687 : * <a
2688 : href="https://gdal.org/tutorials/vector_api_tut.html#reading-from-ogr-using-the-arrow-c-stream-data-interface">Reading
2689 : From OGR using the Arrow C Stream data interface</a> tutorial.
2690 : *
2691 : * Options may be driver specific. The default implementation recognizes the
2692 : * following options:
2693 : * <ul>
2694 : * <li>INCLUDE_FID=YES/NO. Whether to include the FID column. Defaults to YES.
2695 : * </li>
2696 : * <li>MAX_FEATURES_IN_BATCH=integer. Maximum number of features to retrieve in
2697 : * a ArrowArray batch. Defaults to 65 536.</li>
2698 : * <li>TIMEZONE="unknown", "mixed", "UTC", "(+|:)HH:MM" or any other value supported by
2699 : * Arrow. (GDAL >= 3.8)
2700 : * Override the timezone flag nominally provided by
2701 : * OGRFieldDefn::GetTZFlag(), and used for the Arrow field timezone
2702 : * declaration, with a user specified timezone.
2703 : * Note that datetime values in Arrow arrays are always stored in UTC, and
2704 : * that the time zone flag used by GDAL to convert to UTC is the one of the
2705 : * OGRField::Date::TZFlag member at the OGRFeature level. The conversion
2706 : * to UTC of a OGRField::Date is only done if both the timezone indicated by
2707 : * OGRField::Date::TZFlag and the one at the OGRFieldDefn level (or set by
2708 : * this TIMEZONE option) are not unknown.
2709 : * Since GDAL 3.13, "mixed" can be used to create an Arrow structure field,
2710 : * following the "timestamp with offset" extension (https://github.com/apache/arrow/blob/main/docs/source/format/CanonicalExtensions.rst#timestamp-with-offset)
2711 : * and storing both a UTC timestamp and the offset in minutes from the UTC
2712 : * timezone.
2713 : * </li>
2714 : * <li>DATETIME_AS_STRING=YES/NO. Defaults to NO. Added in GDAL 3.11.
2715 : * Whether DateTime fields should be returned as a (normally ISO-8601
2716 : * formatted) string by drivers. The aim is to be able to handle mixed
2717 : * timezones (or timezone naive values) in the same column.
2718 : * All drivers must honour that option, and potentially fallback to the
2719 : * OGRLayer generic implementation if they cannot (which is the case for the
2720 : * Arrow, Parquet and ADBC drivers).
2721 : * When DATETIME_AS_STRING=YES, the TIMEZONE option is ignored.
2722 : * </li>
2723 : * <li>GEOMETRY_METADATA_ENCODING=OGC/GEOARROW (GDAL >= 3.8).
2724 : * The default is OGC, which will lead to setting
2725 : * the Arrow geometry column metadata to ARROW:extension:name=ogc.wkb.
2726 : * If setting to GEOMETRY_METADATA_ENCODING to GEOARROW,
2727 : * ARROW:extension:name=geoarrow.wkb and
2728 : * ARROW:extension:metadata={"crs": <projjson CRS representation>> are set.
2729 : * </li>
2730 : * </ul>
2731 : *
2732 : * The Arrow/Parquet drivers recognize the following option:
2733 : * <ul>
2734 : * <li>GEOMETRY_ENCODING=WKB. To force a fallback to the generic implementation
2735 : * when the native geometry encoding is not WKB. Otherwise the geometry
2736 : * will be returned with its native Arrow encoding
2737 : * (possibly using GeoArrow encoding).</li>
2738 : * </ul>
2739 : *
2740 : * @param out_stream Output stream. Must *not* be NULL. The content of the
2741 : * structure does not need to be initialized.
2742 : * @param papszOptions NULL terminated list of key=value options.
2743 : * @return true in case of success.
2744 : * @since GDAL 3.6
2745 : */
2746 2282 : bool OGRLayer::GetArrowStream(struct ArrowArrayStream *out_stream,
2747 : CSLConstList papszOptions)
2748 : {
2749 2282 : memset(out_stream, 0, sizeof(*out_stream));
2750 3858 : if (m_poSharedArrowArrayStreamPrivateData &&
2751 3858 : m_poSharedArrowArrayStreamPrivateData->m_bArrowArrayStreamInProgress)
2752 : {
2753 4 : CPLError(CE_Failure, CPLE_AppDefined,
2754 : "An arrow Arrow Stream is in progress on that layer. Only "
2755 : "one at a time is allowed in this implementation.");
2756 4 : return false;
2757 : }
2758 2278 : m_aosArrowArrayStreamOptions.Assign(CSLDuplicate(papszOptions), true);
2759 :
2760 2278 : out_stream->get_schema = OGRLayer::StaticGetArrowSchema;
2761 2278 : out_stream->get_next = OGRLayer::StaticGetNextArrowArray;
2762 2278 : out_stream->get_last_error = OGRLayer::GetLastErrorArrowArrayStream;
2763 2278 : out_stream->release = OGRLayer::ReleaseStream;
2764 :
2765 2278 : if (m_poSharedArrowArrayStreamPrivateData == nullptr)
2766 : {
2767 : m_poSharedArrowArrayStreamPrivateData =
2768 706 : std::make_shared<ArrowArrayStreamPrivateData>();
2769 706 : m_poSharedArrowArrayStreamPrivateData->m_poLayer = this;
2770 : }
2771 2278 : m_poSharedArrowArrayStreamPrivateData->m_bArrowArrayStreamInProgress = true;
2772 :
2773 : // Special case for "FID = constant", or "FID IN (constant1, ...., constantN)"
2774 2278 : m_poSharedArrowArrayStreamPrivateData->m_anQueriedFIDs.clear();
2775 2278 : m_poSharedArrowArrayStreamPrivateData->m_iQueriedFIDS = 0;
2776 2278 : if (m_poAttrQuery)
2777 : {
2778 : swq_expr_node *poNode =
2779 1080 : static_cast<swq_expr_node *>(m_poAttrQuery->GetSWQExpr());
2780 3240 : if (poNode->eNodeType == SNT_OPERATION &&
2781 1080 : (poNode->nOperation == SWQ_IN || poNode->nOperation == SWQ_EQ) &&
2782 845 : poNode->papoSubExpr[0]->eNodeType == SNT_COLUMN &&
2783 292 : poNode->papoSubExpr[0]->field_index ==
2784 2169 : GetLayerDefn()->GetFieldCount() + SPF_FID &&
2785 9 : TestCapability(OLCRandomRead))
2786 : {
2787 8 : std::set<GIntBig> oSetAlreadyListed;
2788 13 : for (int i = 1; i < poNode->nSubExprCount; ++i)
2789 : {
2790 27 : if (poNode->papoSubExpr[i]->eNodeType == SNT_CONSTANT &&
2791 18 : poNode->papoSubExpr[i]->field_type == SWQ_INTEGER64 &&
2792 9 : oSetAlreadyListed.find(poNode->papoSubExpr[i]->int_value) ==
2793 18 : oSetAlreadyListed.end())
2794 : {
2795 8 : oSetAlreadyListed.insert(poNode->papoSubExpr[i]->int_value);
2796 8 : m_poSharedArrowArrayStreamPrivateData->m_anQueriedFIDs
2797 8 : .push_back(poNode->papoSubExpr[i]->int_value);
2798 : }
2799 : }
2800 : }
2801 : }
2802 :
2803 2278 : auto poPrivateData = new ArrowArrayStreamPrivateDataSharedDataWrapper();
2804 2278 : poPrivateData->poShared = m_poSharedArrowArrayStreamPrivateData;
2805 2278 : out_stream->private_data = poPrivateData;
2806 2278 : return true;
2807 : }
2808 :
2809 : /************************************************************************/
2810 : /* OGR_L_GetArrowStream() */
2811 : /************************************************************************/
2812 :
2813 : /** Get a Arrow C stream.
2814 : *
2815 : * On successful return, and when the stream interfaces is no longer needed, it
2816 : * must be freed with out_stream->release(out_stream). Please carefully read
2817 : * https://arrow.apache.org/docs/format/CStreamInterface.html for more details
2818 : * on using Arrow C stream.
2819 : *
2820 : * The method may take into account ignored fields set with SetIgnoredFields()
2821 : * (the default implementation does), and should take into account filters set
2822 : * with SetSpatialFilter() and SetAttributeFilter(). Note however that
2823 : * specialized implementations may fallback to the default (slower)
2824 : * implementation when filters are set.
2825 : * Drivers that have a specialized implementation should
2826 : * advertise the OLCFastGetArrowStream capability.
2827 : *
2828 : * There are extra precautions to take into account in a OGR context. Unless
2829 : * otherwise specified by a particular driver implementation, the get_schema(),
2830 : * get_next() and get_last_error() function pointers of the ArrowArrayStream
2831 : * structure should no longer be used after the OGRLayer, from which the
2832 : * ArrowArrayStream structure was initialized, has been destroyed (typically at
2833 : * dataset closing). The reason is that those function pointers will typically
2834 : * point to methods of the OGRLayer instance.
2835 : * However, the ArrowSchema and ArrowArray structures filled from those
2836 : * callbacks can be used and must be released independently from the
2837 : * ArrowArrayStream or the layer.
2838 : *
2839 : * Furthermore, unless otherwise specified by a particular driver
2840 : * implementation, only one ArrowArrayStream can be active at a time on
2841 : * a given layer (that is the last active one must be explicitly released before
2842 : * a next one is asked). Changing filter state, ignored columns, modifying the
2843 : * schema or using ResetReading()/GetNextFeature() while using a
2844 : * ArrowArrayStream is strongly discouraged and may lead to unexpected results.
2845 : * As a rule of thumb, no OGRLayer methods that affect the state of a layer
2846 : * should be called on a layer, while an ArrowArrayStream on it is active.
2847 : *
2848 : * Starting with GDAL 3.8, the ArrowSchema::metadata field filled by the
2849 : * get_schema() callback may be set with the potential following items:
2850 : * <ul>
2851 : * <li>"GDAL:OGR:type": value of OGRFieldDefn::GetType(): (added in 3.11)
2852 : * Only used for DateTime fields when the DATETIME_AS_STRING=YES option is
2853 : * specified.</li>
2854 : * <li>"GDAL:OGR:alternative_name": value of
2855 : * OGRFieldDefn::GetAlternativeNameRef()</li>
2856 : * <li>"GDAL:OGR:comment": value of OGRFieldDefn::GetComment()</li>
2857 : * <li>"GDAL:OGR:default": value of OGRFieldDefn::GetDefault()</li>
2858 : * <li>"GDAL:OGR:subtype": value of OGRFieldDefn::GetSubType()</li>
2859 : * <li>"GDAL:OGR:width": value of OGRFieldDefn::GetWidth() (serialized as a
2860 : * string)</li>
2861 : * <li>"GDAL:OGR:unique": value of OGRFieldDefn::IsUnique() (serialized as
2862 : * "true" or "false")</li>
2863 : * <li>"GDAL:OGR:domain_name": value of OGRFieldDefn::GetDomainName()</li>
2864 : * </ul>
2865 : *
2866 : * A potential usage can be:
2867 : \code{.cpp}
2868 : struct ArrowArrayStream stream;
2869 : if( !OGR_L_GetArrowStream(hLayer, &stream, nullptr))
2870 : {
2871 : CPLError(CE_Failure, CPLE_AppDefined,
2872 : "OGR_L_GetArrowStream() failed");
2873 : exit(1);
2874 : }
2875 : struct ArrowSchema schema;
2876 : if( stream.get_schema(&stream, &schema) == 0 )
2877 : {
2878 : // Do something useful
2879 : schema.release(schema);
2880 : }
2881 : while( true )
2882 : {
2883 : struct ArrowArray array;
2884 : // Look for an error (get_next() returning a non-zero code), or
2885 : // end of iteration (array.release == nullptr)
2886 : if( stream.get_next(&stream, &array) != 0 ||
2887 : array.release == nullptr )
2888 : {
2889 : break;
2890 : }
2891 : // Do something useful
2892 : array.release(&array);
2893 : }
2894 : stream.release(&stream);
2895 : \endcode
2896 : *
2897 : * A full example is available in the
2898 : * <a
2899 : href="https://gdal.org/tutorials/vector_api_tut.html#reading-from-ogr-using-the-arrow-c-stream-data-interface">Reading
2900 : From OGR using the Arrow C Stream data interface</a> tutorial.
2901 : *
2902 : * Options may be driver specific. The default implementation recognizes the
2903 : * following options:
2904 : * <ul>
2905 : * <li>INCLUDE_FID=YES/NO. Whether to include the FID column. Defaults to
2906 : YES.</li>
2907 : * <li>MAX_FEATURES_IN_BATCH=integer. Maximum number of features to retrieve in
2908 : * a ArrowArray batch. Defaults to 65 536.</li>
2909 : * <li>TIMEZONE="unknown", "mixed", "UTC", "(+|:)HH:MM" or any other value supported by
2910 : * Arrow. (GDAL >= 3.8)
2911 : * Override the timezone flag nominally provided by
2912 : * OGRFieldDefn::GetTZFlag(), and used for the Arrow field timezone
2913 : * declaration, with a user specified timezone.
2914 : * Note that datetime values in Arrow arrays are always stored in UTC, and
2915 : * that the time zone flag used by GDAL to convert to UTC is the one of the
2916 : * OGRField::Date::TZFlag member at the OGRFeature level. The conversion
2917 : * to UTC of a OGRField::Date is only done if both the timezone indicated by
2918 : * OGRField::Date::TZFlag and the one at the OGRFieldDefn level (or set by
2919 : * this TIMEZONE option) are not unknown.
2920 : * Since GDAL 3.13, "mixed" can be used to create an Arrow structure field,
2921 : * following the "timestamp with offset" extension (https://github.com/apache/arrow/blob/main/docs/source/format/CanonicalExtensions.rst#timestamp-with-offset)
2922 : * and storing both a UTC timestamp and the offset in minutes from the UTC
2923 : * timezone.
2924 : * </li>
2925 : * <li>DATETIME_AS_STRING=YES/NO. Defaults to NO. Added in GDAL 3.11.
2926 : * Whether DateTime fields should be returned as a (normally ISO-8601
2927 : * formatted) string by drivers. The aim is to be able to handle mixed
2928 : * timezones (or timezone naive values) in the same column.
2929 : * All drivers must honour that option, and potentially fallback to the
2930 : * OGRLayer generic implementation if they cannot (which is the case for the
2931 : * Arrow, Parquet and ADBC drivers).
2932 : * When DATETIME_AS_STRING=YES, the TIMEZONE option is ignored.
2933 : * </li>
2934 : * <li>GEOMETRY_METADATA_ENCODING=OGC/GEOARROW (GDAL >= 3.8).
2935 : * The default is OGC, which will lead to setting
2936 : * the Arrow geometry column metadata to ARROW:extension:name=ogc.wkb.
2937 : * If setting to GEOMETRY_METADATA_ENCODING to GEOARROW,
2938 : * ARROW:extension:name=geoarrow.wkb and
2939 : * ARROW:extension:metadata={"crs": <projjson CRS representation>> are set.
2940 : * </li>
2941 : * </ul>
2942 : *
2943 : * The Arrow/Parquet drivers recognize the following option:
2944 : * <ul>
2945 : * <li>GEOMETRY_ENCODING=WKB. To force a fallback to the generic implementation
2946 : * when the native geometry encoding is not WKB. Otherwise the geometry
2947 : * will be returned with its native Arrow encoding
2948 : * (possibly using GeoArrow encoding).</li>
2949 : * </ul>
2950 : *
2951 : * @param hLayer Layer
2952 : * @param out_stream Output stream. Must *not* be NULL. The content of the
2953 : * structure does not need to be initialized.
2954 : * @param papszOptions NULL terminated list of key=value options.
2955 : * @return true in case of success.
2956 : * @since GDAL 3.6
2957 : */
2958 375 : bool OGR_L_GetArrowStream(OGRLayerH hLayer, struct ArrowArrayStream *out_stream,
2959 : CSLConstList papszOptions)
2960 : {
2961 375 : VALIDATE_POINTER1(hLayer, "OGR_L_GetArrowStream", false);
2962 375 : VALIDATE_POINTER1(out_stream, "OGR_L_GetArrowStream", false);
2963 :
2964 750 : return OGRLayer::FromHandle(hLayer)->GetArrowStream(out_stream,
2965 375 : papszOptions);
2966 : }
2967 :
2968 : /************************************************************************/
2969 : /* OGRParseArrowMetadata() */
2970 : /************************************************************************/
2971 :
2972 : std::map<std::string, std::string>
2973 208 : OGRParseArrowMetadata(const char *pabyMetadata)
2974 : {
2975 208 : std::map<std::string, std::string> oMetadata;
2976 : int32_t nKVP;
2977 208 : memcpy(&nKVP, pabyMetadata, sizeof(int32_t));
2978 208 : pabyMetadata += sizeof(int32_t);
2979 425 : for (int i = 0; i < nKVP; ++i)
2980 : {
2981 : int32_t nSizeKey;
2982 217 : memcpy(&nSizeKey, pabyMetadata, sizeof(int32_t));
2983 217 : pabyMetadata += sizeof(int32_t);
2984 434 : std::string osKey;
2985 217 : osKey.assign(pabyMetadata, nSizeKey);
2986 217 : pabyMetadata += nSizeKey;
2987 :
2988 : int32_t nSizeValue;
2989 217 : memcpy(&nSizeValue, pabyMetadata, sizeof(int32_t));
2990 217 : pabyMetadata += sizeof(int32_t);
2991 434 : std::string osValue;
2992 217 : osValue.assign(pabyMetadata, nSizeValue);
2993 217 : pabyMetadata += nSizeValue;
2994 :
2995 217 : oMetadata[osKey] = std::move(osValue);
2996 : }
2997 :
2998 416 : return oMetadata;
2999 : }
3000 :
3001 : /************************************************************************/
3002 : /* GetStringAsStringView() */
3003 : /************************************************************************/
3004 :
3005 : template <typename OffsetType>
3006 125 : static std::string_view GetStringAsStringView(const struct ArrowArray *array,
3007 : const size_t nIdx)
3008 : {
3009 125 : const OffsetType *panOffsets =
3010 125 : static_cast<const OffsetType *>(array->buffers[1]) +
3011 125 : static_cast<size_t>(array->offset) + nIdx;
3012 125 : const char *pchStr = reinterpret_cast<const char *>(array->buffers[2]);
3013 : if constexpr (std::is_same_v<OffsetType, uint64_t>)
3014 : {
3015 82 : if (panOffsets[1] - panOffsets[0] >
3016 41 : std::numeric_limits<size_t>::max() - 1)
3017 : {
3018 0 : CPLError(CE_Failure, CPLE_AppDefined, "Too large string");
3019 0 : return std::string_view();
3020 : }
3021 : }
3022 125 : return std::string_view(pchStr + static_cast<size_t>(panOffsets[0]),
3023 125 : static_cast<size_t>(panOffsets[1] - panOffsets[0]));
3024 : }
3025 :
3026 : /************************************************************************/
3027 : /* GetStringView() */
3028 : /************************************************************************/
3029 :
3030 0 : static std::string_view GetStringView(const struct ArrowArray *array,
3031 : const size_t nIdx)
3032 : {
3033 : // Cf https://arrow.apache.org/docs/format/Columnar.html#variable-size-binary-view-layout
3034 0 : const uint32_t *panStringView =
3035 0 : static_cast<const uint32_t *>(array->buffers[1]) +
3036 0 : (static_cast<size_t>(array->offset) + nIdx) * N_VALUES_PER_STRING_VIEW;
3037 0 : constexpr int IDX_LENGTH = 0;
3038 0 : constexpr int IDX_PREFIX_OR_DATA = 1;
3039 0 : constexpr int IDX_BUFFER_IDX = 2;
3040 0 : constexpr int IDX_OFFSET = 3;
3041 0 : const uint32_t nLength = panStringView[IDX_LENGTH];
3042 0 : const char *pchPrefixOrStr =
3043 : reinterpret_cast<const char *>(panStringView + IDX_PREFIX_OR_DATA);
3044 0 : if (nLength <= 12)
3045 : {
3046 0 : return std::string_view(pchPrefixOrStr, nLength);
3047 : }
3048 : else
3049 : {
3050 0 : const uint32_t nBufferIdx = panStringView[IDX_BUFFER_IDX];
3051 0 : const uint32_t nOffset = panStringView[IDX_OFFSET];
3052 0 : constexpr int BASE_BUFFER_IDX = 2;
3053 0 : CPLAssert(BASE_BUFFER_IDX + nBufferIdx < array->n_buffers);
3054 0 : std::string_view s(static_cast<const char *>(
3055 0 : array->buffers[BASE_BUFFER_IDX + nBufferIdx]) +
3056 0 : nOffset,
3057 0 : nLength);
3058 : #ifdef DEBUG
3059 : // cppcheck-suppress unreadVariable
3060 0 : constexpr int PREFIX_LENGTH = 4;
3061 0 : CPLAssert(memcmp(s.data(), pchPrefixOrStr, PREFIX_LENGTH) == 0);
3062 : #endif
3063 0 : return s;
3064 : }
3065 : }
3066 :
3067 : /************************************************************************/
3068 : /* ParseDecimalFormat() */
3069 : /************************************************************************/
3070 :
3071 686 : static bool ParseDecimalFormat(const char *format, int &nPrecision, int &nScale,
3072 : int &nWidthInBytes)
3073 : {
3074 : // d:19,10 ==> decimal128 [precision 19, scale 10]
3075 : // d:19,10,NNN ==> decimal bitwidth = NNN [precision 19, scale 10]
3076 686 : nPrecision = 0;
3077 686 : nScale = 0;
3078 686 : nWidthInBytes = 128 / 8; // 128 bit
3079 686 : const char *pszFirstComma = strchr(format + 2, ',');
3080 686 : if (pszFirstComma)
3081 : {
3082 686 : nPrecision = atoi(format + 2);
3083 686 : nScale = atoi(pszFirstComma + 1);
3084 686 : const char *pszSecondComma = strchr(pszFirstComma + 1, ',');
3085 686 : if (pszSecondComma)
3086 : {
3087 274 : const int nWidthInBits = atoi(pszSecondComma + 1);
3088 274 : if ((nWidthInBits % 8) != 0)
3089 : {
3090 : // shouldn't happen for well-format schemas
3091 0 : nWidthInBytes = 0;
3092 0 : return false;
3093 : }
3094 : else
3095 : {
3096 274 : nWidthInBytes = nWidthInBits / 8;
3097 : }
3098 : }
3099 : }
3100 : else
3101 : {
3102 : // shouldn't happen for well-format schemas
3103 0 : nWidthInBytes = 0;
3104 0 : return false;
3105 : }
3106 686 : return true;
3107 : }
3108 :
3109 : /************************************************************************/
3110 : /* GetErrorIfUnsupportedDecimal() */
3111 : /************************************************************************/
3112 :
3113 55 : static const char *GetErrorIfUnsupportedDecimal(int nWidthInBytes,
3114 : int nPrecision)
3115 : {
3116 :
3117 55 : if (nWidthInBytes != 128 / 8 && nWidthInBytes != 256 / 8)
3118 : {
3119 0 : return "For decimal field, only width 128 and 256 are supported";
3120 : }
3121 :
3122 : // precision=19 fits on 64 bits
3123 55 : if (nPrecision <= 0 || nPrecision > 19)
3124 : {
3125 0 : return "For decimal field, only precision up to 19 is supported";
3126 : }
3127 :
3128 55 : return nullptr;
3129 : }
3130 :
3131 : /************************************************************************/
3132 : /* IsArrowTimeStampWithOffsetField() */
3133 : /************************************************************************/
3134 :
3135 1260 : static bool IsArrowTimeStampWithOffsetField(const struct ArrowSchema *schema)
3136 : {
3137 : bool ret =
3138 3377 : IsStructure(schema->format) && schema->n_children == 2 &&
3139 857 : IsTimestamp(schema->children[0]->format) &&
3140 0 : IsInt16(schema->children[1]->format) &&
3141 2520 : strcmp(schema->children[0]->name, ATSWO_TIMESTAMP_FIELD_NAME) == 0 &&
3142 0 : strcmp(schema->children[1]->name, ATSWO_OFFSET_MINUTES_FIELD_NAME) == 0;
3143 1260 : if (ret)
3144 : {
3145 0 : const auto oMetadata = OGRParseArrowMetadata(schema->metadata);
3146 0 : const auto oIter = oMetadata.find(ARROW_EXTENSION_NAME_KEY);
3147 0 : ret = oIter != oMetadata.end() &&
3148 0 : oIter->second == EXTENSION_NAME_ARROW_TIMESTAMP_WITH_OFFSET;
3149 : }
3150 1260 : return ret;
3151 : }
3152 :
3153 : /************************************************************************/
3154 : /* IsHandledSchema() */
3155 : /************************************************************************/
3156 :
3157 15760 : static bool IsHandledSchema(bool bTopLevel, const struct ArrowSchema *schema,
3158 : const std::string &osPrefix, bool bHasAttrQuery,
3159 : const CPLStringList &aosUsedFields)
3160 : {
3161 15760 : const char *format = schema->format;
3162 15760 : if (IsStructure(format))
3163 : {
3164 1246 : if (IsArrowTimeStampWithOffsetField(schema) &&
3165 1246 : aosUsedFields.FindString((osPrefix + schema->name).c_str()) >= 0)
3166 : {
3167 0 : return false;
3168 : }
3169 :
3170 12285 : for (int64_t i = 0; i < schema->n_children; ++i)
3171 : {
3172 44156 : if (!IsHandledSchema(/* bTopLevel = */ false,
3173 11039 : schema->children[static_cast<size_t>(i)],
3174 24772 : bTopLevel ? std::string()
3175 13733 : : osPrefix + schema->name + ".",
3176 : bHasAttrQuery, aosUsedFields))
3177 : {
3178 0 : return false;
3179 : }
3180 : }
3181 1246 : return true;
3182 : }
3183 :
3184 : // Lists or maps
3185 25169 : if (IsList(format) || IsLargeList(format) || IsFixedSizeList(format) ||
3186 10655 : IsMap(format))
3187 : {
3188 4566 : if (!IsHandledSchema(/* bTopLevel = */ false, schema->children[0],
3189 : osPrefix, bHasAttrQuery, aosUsedFields))
3190 : {
3191 0 : return false;
3192 : }
3193 : // For now, we can't filter on lists or maps
3194 4566 : if (aosUsedFields.FindString((osPrefix + schema->name).c_str()) >= 0)
3195 : {
3196 0 : CPLDebug("OGR",
3197 : "Field %s has unhandled format '%s' for an "
3198 : "attribute to filter on",
3199 0 : (osPrefix + schema->name).c_str(), format);
3200 0 : return false;
3201 : }
3202 4566 : return true;
3203 : }
3204 :
3205 9948 : const char *const apszHandledFormats[] = {
3206 : "b", // boolean
3207 : "c", // int8
3208 : "C", // uint8
3209 : "s", // int16
3210 : "S", // uint16
3211 : "i", // int32
3212 : "I", // uint32
3213 : "l", // int64
3214 : "L", // uint64
3215 : "e", // float16
3216 : "f", // float32
3217 : "g", // float64,
3218 : "z", // binary
3219 : "Z", // large binary
3220 : "u", // UTF-8 string
3221 : "U", // large UTF-8 string
3222 : "tdD", // date32[days]
3223 : "tdm", // date64[milliseconds]
3224 : "tts", //time32 [seconds]
3225 : "ttm", //time32 [milliseconds]
3226 : "ttu", //time64 [microseconds]
3227 : "ttn", //time64 [nanoseconds]
3228 : };
3229 :
3230 115231 : for (const char *pszHandledFormat : apszHandledFormats)
3231 : {
3232 113923 : if (strcmp(format, pszHandledFormat) == 0)
3233 : {
3234 8640 : return true;
3235 : }
3236 : }
3237 :
3238 1308 : if (IsDecimal(format))
3239 : {
3240 790 : if (bHasAttrQuery &&
3241 790 : aosUsedFields.FindString((osPrefix + schema->name).c_str()) >= 0)
3242 : {
3243 2 : int nPrecision = 0;
3244 2 : int nScale = 0;
3245 2 : int nWidthInBytes = 0;
3246 2 : if (!ParseDecimalFormat(format, nPrecision, nScale, nWidthInBytes))
3247 : {
3248 0 : CPLDebug("OGR", "%s",
3249 0 : (std::string("Invalid field format ") + format +
3250 0 : " for field " + osPrefix + schema->name)
3251 : .c_str());
3252 0 : return false;
3253 : }
3254 :
3255 : const char *pszError =
3256 2 : GetErrorIfUnsupportedDecimal(nWidthInBytes, nPrecision);
3257 2 : if (pszError)
3258 : {
3259 0 : CPLDebug("OGR", "%s", pszError);
3260 0 : return false;
3261 : }
3262 : }
3263 412 : return true;
3264 : }
3265 :
3266 896 : if (IsFixedWidthBinary(format) || IsTimestamp(format))
3267 : {
3268 896 : return true;
3269 : }
3270 :
3271 0 : CPLDebug("OGR", "Field %s has unhandled format '%s'",
3272 0 : (osPrefix + schema->name).c_str(), format);
3273 0 : return false;
3274 : }
3275 :
3276 : /************************************************************************/
3277 : /* OGRLayer::CanPostFilterArrowArray() */
3278 : /************************************************************************/
3279 :
3280 : /** Whether the PostFilterArrowArray() can work on the schema to remove
3281 : * rows that aren't selected by the spatial or attribute filter.
3282 : */
3283 155 : bool OGRLayer::CanPostFilterArrowArray(const struct ArrowSchema *schema) const
3284 : {
3285 155 : if (!IsHandledSchema(
3286 155 : /* bTopLevel=*/true, schema, std::string(),
3287 155 : m_poAttrQuery != nullptr,
3288 310 : m_poAttrQuery ? CPLStringList(m_poAttrQuery->GetUsedFields())
3289 : : CPLStringList()))
3290 : {
3291 0 : return false;
3292 : }
3293 :
3294 155 : if (m_poFilterGeom)
3295 : {
3296 22 : bool bFound = false;
3297 : const char *pszGeomFieldName =
3298 : const_cast<OGRLayer *>(this)
3299 22 : ->GetLayerDefn()
3300 22 : ->GetGeomFieldDefn(m_iGeomFieldFilter)
3301 22 : ->GetNameRef();
3302 839 : for (int64_t i = 0; i < schema->n_children; ++i)
3303 : {
3304 839 : const auto fieldSchema = schema->children[i];
3305 839 : if (strcmp(fieldSchema->name, pszGeomFieldName) == 0)
3306 : {
3307 23 : if (!IsBinary(fieldSchema->format) &&
3308 1 : !IsLargeBinary(fieldSchema->format))
3309 : {
3310 1 : CPLDebug("OGR", "Geometry field %s has handled format '%s'",
3311 : fieldSchema->name, fieldSchema->format);
3312 1 : return false;
3313 : }
3314 :
3315 : // Check if ARROW:extension:name = ogc.wkb
3316 21 : const char *pabyMetadata = fieldSchema->metadata;
3317 21 : if (!pabyMetadata)
3318 : {
3319 0 : CPLDebug(
3320 : "OGR",
3321 : "Geometry field %s lacks metadata in its schema field",
3322 : fieldSchema->name);
3323 0 : return false;
3324 : }
3325 :
3326 21 : const auto oMetadata = OGRParseArrowMetadata(pabyMetadata);
3327 21 : auto oIter = oMetadata.find(ARROW_EXTENSION_NAME_KEY);
3328 21 : if (oIter == oMetadata.end())
3329 : {
3330 0 : CPLDebug("OGR",
3331 : "Geometry field %s lacks "
3332 : "%s metadata "
3333 : "in its schema field",
3334 : fieldSchema->name, ARROW_EXTENSION_NAME_KEY);
3335 0 : return false;
3336 : }
3337 21 : if (oIter->second != EXTENSION_NAME_OGC_WKB &&
3338 0 : oIter->second != EXTENSION_NAME_GEOARROW_WKB)
3339 : {
3340 0 : CPLDebug("OGR",
3341 : "Geometry field %s has unexpected "
3342 : "%s = '%s' metadata "
3343 : "in its schema field",
3344 : fieldSchema->name, ARROW_EXTENSION_NAME_KEY,
3345 0 : oIter->second.c_str());
3346 0 : return false;
3347 : }
3348 :
3349 21 : bFound = true;
3350 21 : break;
3351 : }
3352 : }
3353 21 : if (!bFound)
3354 : {
3355 0 : CPLDebug("OGR", "Cannot find geometry field %s in schema",
3356 : pszGeomFieldName);
3357 0 : return false;
3358 : }
3359 : }
3360 :
3361 154 : return true;
3362 : }
3363 :
3364 : #if 0
3365 : /************************************************************************/
3366 : /* CheckValidityBuffer() */
3367 : /************************************************************************/
3368 :
3369 : static void CheckValidityBuffer(const struct ArrowArray *array)
3370 : {
3371 : if (array->null_count < 0)
3372 : return;
3373 : const uint8_t *pabyValidity =
3374 : static_cast<const uint8_t *>(const_cast<const void *>(array->buffers[0]));
3375 : if( !pabyValidity )
3376 : {
3377 : CPLAssert(array->null_count == 0);
3378 : return;
3379 : }
3380 : size_t null_count = 0;
3381 : const size_t nOffset = static_cast<size_t>(array->offset);
3382 : for(size_t i = 0; i < static_cast<size_t>(array->length); ++i )
3383 : {
3384 : if (!TestBit(pabyValidity, i + nOffset))
3385 : ++ null_count;
3386 : }
3387 : CPLAssert(static_cast<size_t>(array->null_count) == null_count);
3388 : }
3389 : #endif
3390 :
3391 : /************************************************************************/
3392 : /* CompactValidityBuffer() */
3393 : /************************************************************************/
3394 :
3395 7682 : static void CompactValidityBuffer(
3396 : const struct ArrowSchema *, struct ArrowArray *array, size_t iStart,
3397 : const std::vector<bool> &abyValidityFromFilters, size_t nNewLength)
3398 : {
3399 7682 : if (array->null_count <= 0)
3400 : {
3401 4186 : return;
3402 : }
3403 :
3404 : // Invalidate null_count as the same validity buffer may be used when
3405 : // scrolling batches, and this creates confusion if we try to set it
3406 : // to different values among the batches
3407 3496 : array->null_count = -1;
3408 :
3409 3496 : CPLAssert(static_cast<size_t>(array->length) >=
3410 : iStart + abyValidityFromFilters.size());
3411 3496 : uint8_t *pabyValidity =
3412 3496 : static_cast<uint8_t *>(const_cast<void *>(array->buffers[0]));
3413 3496 : CPLAssert(pabyValidity);
3414 3496 : const size_t nLength = abyValidityFromFilters.size();
3415 3496 : const size_t nOffset = static_cast<size_t>(array->offset);
3416 3496 : size_t j = iStart + nOffset;
3417 12883 : for (size_t i = 0; i < nLength && j < nNewLength + nOffset; ++i)
3418 : {
3419 9387 : if (abyValidityFromFilters[i])
3420 : {
3421 5823 : if (TestBit(pabyValidity, i + iStart + nOffset))
3422 4387 : SetBit(pabyValidity, j);
3423 : else
3424 1436 : UnsetBit(pabyValidity, j);
3425 5823 : ++j;
3426 : }
3427 : }
3428 : }
3429 :
3430 : /************************************************************************/
3431 : /* CompactBoolArray() */
3432 : /************************************************************************/
3433 :
3434 224 : static void CompactBoolArray(const struct ArrowSchema *schema,
3435 : struct ArrowArray *array, size_t iStart,
3436 : const std::vector<bool> &abyValidityFromFilters,
3437 : size_t nNewLength)
3438 : {
3439 224 : CPLAssert(array->n_children == 0);
3440 224 : CPLAssert(array->n_buffers == 2);
3441 224 : CPLAssert(static_cast<size_t>(array->length) >=
3442 : iStart + abyValidityFromFilters.size());
3443 :
3444 224 : const size_t nLength = abyValidityFromFilters.size();
3445 224 : const size_t nOffset = static_cast<size_t>(array->offset);
3446 224 : uint8_t *pabyData =
3447 224 : static_cast<uint8_t *>(const_cast<void *>(array->buffers[1]));
3448 224 : size_t j = iStart + nOffset;
3449 1147 : for (size_t i = 0; i < nLength; ++i)
3450 : {
3451 923 : if (abyValidityFromFilters[i])
3452 : {
3453 424 : if (TestBit(pabyData, i + iStart + nOffset))
3454 199 : SetBit(pabyData, j);
3455 : else
3456 225 : UnsetBit(pabyData, j);
3457 :
3458 424 : ++j;
3459 : }
3460 : }
3461 :
3462 224 : if (schema->flags & ARROW_FLAG_NULLABLE)
3463 224 : CompactValidityBuffer(schema, array, iStart, abyValidityFromFilters,
3464 : nNewLength);
3465 :
3466 224 : array->length = nNewLength;
3467 224 : }
3468 :
3469 : /************************************************************************/
3470 : /* CompactPrimitiveArray() */
3471 : /************************************************************************/
3472 :
3473 : template <class T>
3474 3575 : static void CompactPrimitiveArray(
3475 : const struct ArrowSchema *schema, struct ArrowArray *array, size_t iStart,
3476 : const std::vector<bool> &abyValidityFromFilters, size_t nNewLength)
3477 : {
3478 3575 : CPLAssert(array->n_children == 0);
3479 3575 : CPLAssert(array->n_buffers == 2);
3480 3575 : CPLAssert(static_cast<size_t>(array->length) >=
3481 : iStart + abyValidityFromFilters.size());
3482 :
3483 3575 : const size_t nLength = abyValidityFromFilters.size();
3484 3575 : const size_t nOffset = static_cast<size_t>(array->offset);
3485 3575 : T *paData =
3486 3575 : static_cast<T *>(const_cast<void *>(array->buffers[1])) + nOffset;
3487 3575 : size_t j = iStart;
3488 18134 : for (size_t i = 0; i < nLength; ++i)
3489 : {
3490 14559 : if (abyValidityFromFilters[i])
3491 : {
3492 6366 : paData[j] = paData[i + iStart];
3493 6366 : ++j;
3494 : }
3495 : }
3496 :
3497 3575 : if (schema->flags & ARROW_FLAG_NULLABLE)
3498 3564 : CompactValidityBuffer(schema, array, iStart, abyValidityFromFilters,
3499 : nNewLength);
3500 :
3501 3575 : array->length = nNewLength;
3502 3575 : }
3503 :
3504 : /************************************************************************/
3505 : /* CompactStringOrBinaryArray() */
3506 : /************************************************************************/
3507 :
3508 : template <class OffsetType>
3509 1187 : static void CompactStringOrBinaryArray(
3510 : const struct ArrowSchema *schema, struct ArrowArray *array, size_t iStart,
3511 : const std::vector<bool> &abyValidityFromFilters, size_t nNewLength)
3512 : {
3513 1187 : CPLAssert(array->n_children == 0);
3514 1187 : CPLAssert(array->n_buffers == 3);
3515 1187 : CPLAssert(static_cast<size_t>(array->length) >=
3516 : iStart + abyValidityFromFilters.size());
3517 :
3518 1187 : const size_t nLength = abyValidityFromFilters.size();
3519 1187 : const size_t nOffset = static_cast<size_t>(array->offset);
3520 1187 : OffsetType *panOffsets =
3521 1187 : static_cast<OffsetType *>(const_cast<void *>(array->buffers[1])) +
3522 : nOffset;
3523 1187 : GByte *pabyData =
3524 1187 : static_cast<GByte *>(const_cast<void *>(array->buffers[2]));
3525 1187 : size_t j = iStart;
3526 1187 : OffsetType nCurOffset = panOffsets[iStart];
3527 5103 : for (size_t i = 0; i < nLength; ++i)
3528 : {
3529 3916 : if (abyValidityFromFilters[i])
3530 : {
3531 1768 : const auto nStartOffset = panOffsets[i + iStart];
3532 1768 : const auto nEndOffset = panOffsets[i + iStart + 1];
3533 1768 : panOffsets[j] = nCurOffset;
3534 1768 : const auto nSize = static_cast<size_t>(nEndOffset - nStartOffset);
3535 1768 : if (nSize)
3536 : {
3537 1562 : if (nCurOffset < nStartOffset)
3538 : {
3539 636 : memmove(pabyData + nCurOffset, pabyData + nStartOffset,
3540 : nSize);
3541 : }
3542 1562 : nCurOffset += static_cast<OffsetType>(nSize);
3543 : }
3544 1768 : ++j;
3545 : }
3546 : }
3547 1187 : panOffsets[j] = nCurOffset;
3548 :
3549 1187 : if (schema->flags & ARROW_FLAG_NULLABLE)
3550 806 : CompactValidityBuffer(schema, array, iStart, abyValidityFromFilters,
3551 : nNewLength);
3552 :
3553 1187 : array->length = nNewLength;
3554 1187 : }
3555 :
3556 : /************************************************************************/
3557 : /* CompactStringViewArray() */
3558 : /************************************************************************/
3559 :
3560 0 : static void CompactStringViewArray(
3561 : const struct ArrowSchema *schema, struct ArrowArray *array, size_t iStart,
3562 : const std::vector<bool> &abyValidityFromFilters, size_t nNewLength)
3563 : {
3564 0 : CPLAssert(array->n_children == 0);
3565 0 : CPLAssert(array->n_buffers >= 2);
3566 0 : const size_t nLength = abyValidityFromFilters.size();
3567 0 : CPLAssert(static_cast<size_t>(array->length) >= iStart + nLength);
3568 :
3569 : // We only compact the string view buffer, not the string content buffers.
3570 0 : const size_t nOffset = static_cast<size_t>(array->offset);
3571 : // Cf https://arrow.apache.org/docs/format/Columnar.html#variable-size-binary-view-layout
3572 0 : uint32_t *panStringView =
3573 0 : static_cast<uint32_t *>(const_cast<void *>(array->buffers[1])) +
3574 : nOffset * N_VALUES_PER_STRING_VIEW;
3575 0 : for (size_t i = 0, j = 0; i < nLength; ++i)
3576 : {
3577 0 : if (abyValidityFromFilters[i])
3578 : {
3579 0 : if (j < i)
3580 : {
3581 0 : memmove(panStringView + (j + iStart) * N_VALUES_PER_STRING_VIEW,
3582 0 : panStringView + (i + iStart) * N_VALUES_PER_STRING_VIEW,
3583 : sizeof(panStringView[0]) * N_VALUES_PER_STRING_VIEW);
3584 : }
3585 0 : ++j;
3586 : }
3587 : }
3588 :
3589 0 : if (schema->flags & ARROW_FLAG_NULLABLE)
3590 0 : CompactValidityBuffer(schema, array, iStart, abyValidityFromFilters,
3591 : nNewLength);
3592 :
3593 0 : array->length = nNewLength;
3594 0 : }
3595 :
3596 : /************************************************************************/
3597 : /* CompactFixedWidthArray() */
3598 : /************************************************************************/
3599 :
3600 : static void
3601 305 : CompactFixedWidthArray(const struct ArrowSchema *schema,
3602 : struct ArrowArray *array, int nWidth, size_t iStart,
3603 : const std::vector<bool> &abyValidityFromFilters,
3604 : size_t nNewLength)
3605 : {
3606 305 : CPLAssert(array->n_children == 0);
3607 305 : CPLAssert(array->n_buffers == 2);
3608 305 : CPLAssert(static_cast<size_t>(array->length) >=
3609 : iStart + abyValidityFromFilters.size());
3610 :
3611 305 : const size_t nLength = abyValidityFromFilters.size();
3612 305 : const size_t nOffset = static_cast<size_t>(array->offset);
3613 305 : GByte *pabyData =
3614 305 : static_cast<GByte *>(const_cast<void *>(array->buffers[1]));
3615 305 : size_t nStartOffset = (iStart + nOffset) * nWidth;
3616 305 : size_t nCurOffset = nStartOffset;
3617 1133 : for (size_t i = 0; i < nLength; ++i, nStartOffset += nWidth)
3618 : {
3619 828 : if (abyValidityFromFilters[i])
3620 : {
3621 391 : if (nCurOffset < nStartOffset)
3622 : {
3623 210 : memcpy(pabyData + nCurOffset, pabyData + nStartOffset, nWidth);
3624 : }
3625 391 : nCurOffset += nWidth;
3626 : }
3627 : }
3628 :
3629 305 : if (schema->flags & ARROW_FLAG_NULLABLE)
3630 305 : CompactValidityBuffer(schema, array, iStart, abyValidityFromFilters,
3631 : nNewLength);
3632 :
3633 305 : array->length = nNewLength;
3634 305 : }
3635 :
3636 : /************************************************************************/
3637 : /* CompactStructArray() */
3638 : /************************************************************************/
3639 :
3640 : static bool CompactArray(const struct ArrowSchema *schema,
3641 : struct ArrowArray *array, size_t iStart,
3642 : const std::vector<bool> &abyValidityFromFilters,
3643 : size_t nNewLength);
3644 :
3645 665 : static bool CompactStructArray(const struct ArrowSchema *schema,
3646 : struct ArrowArray *array, size_t iStart,
3647 : const std::vector<bool> &abyValidityFromFilters,
3648 : size_t nNewLength)
3649 : {
3650 : // The equality might not be strict in the case of when some sub-arrays
3651 : // are fully void !
3652 665 : CPLAssert(array->n_children <= schema->n_children);
3653 6718 : for (int64_t iField = 0; iField < array->n_children; ++iField)
3654 : {
3655 6053 : const auto psChildSchema = schema->children[iField];
3656 6053 : const auto psChildArray = array->children[iField];
3657 : // To please Arrow validation...
3658 6053 : const size_t nChildNewLength =
3659 6053 : static_cast<size_t>(array->offset) + nNewLength;
3660 6053 : if (psChildArray->length > array->length)
3661 : {
3662 120 : std::vector<bool> abyChildValidity(abyValidityFromFilters);
3663 120 : abyChildValidity.resize(
3664 120 : abyValidityFromFilters.size() +
3665 120 : static_cast<size_t>(psChildArray->length - array->length),
3666 : false);
3667 120 : if (!CompactArray(psChildSchema, psChildArray, iStart,
3668 : abyChildValidity, nChildNewLength))
3669 : {
3670 0 : return false;
3671 : }
3672 : }
3673 : else
3674 : {
3675 5933 : if (!CompactArray(psChildSchema, psChildArray, iStart,
3676 : abyValidityFromFilters, nChildNewLength))
3677 : {
3678 0 : return false;
3679 : }
3680 : }
3681 6053 : CPLAssert(psChildArray->length ==
3682 : static_cast<int64_t>(nChildNewLength));
3683 : }
3684 :
3685 665 : if (schema->flags & ARROW_FLAG_NULLABLE)
3686 201 : CompactValidityBuffer(schema, array, iStart, abyValidityFromFilters,
3687 : nNewLength);
3688 :
3689 665 : array->length = nNewLength;
3690 :
3691 665 : return true;
3692 : }
3693 :
3694 : /************************************************************************/
3695 : /* InvalidateNullCountRec() */
3696 : /************************************************************************/
3697 :
3698 570 : static void InvalidateNullCountRec(const struct ArrowSchema *schema,
3699 : struct ArrowArray *array)
3700 : {
3701 570 : if (schema->flags & ARROW_FLAG_NULLABLE)
3702 210 : array->null_count = -1;
3703 960 : for (int i = 0; i < array->n_children; ++i)
3704 390 : InvalidateNullCountRec(schema->children[i], array->children[i]);
3705 570 : }
3706 :
3707 : /************************************************************************/
3708 : /* CompactListArray() */
3709 : /************************************************************************/
3710 :
3711 : template <class OffsetType>
3712 1773 : static bool CompactListArray(const struct ArrowSchema *schema,
3713 : struct ArrowArray *array, size_t iStart,
3714 : const std::vector<bool> &abyValidityFromFilters,
3715 : size_t nNewLength)
3716 : {
3717 1773 : CPLAssert(static_cast<size_t>(array->length) >=
3718 : iStart + abyValidityFromFilters.size());
3719 1773 : CPLAssert(array->n_children == 1);
3720 1773 : CPLAssert(array->n_buffers == 2);
3721 :
3722 1773 : const auto psChildSchema = schema->children[0];
3723 1773 : const auto psChildArray = array->children[0];
3724 :
3725 1773 : const size_t nLength = abyValidityFromFilters.size();
3726 1773 : const size_t nOffset = static_cast<size_t>(array->offset);
3727 1773 : OffsetType *panOffsets =
3728 1773 : static_cast<OffsetType *>(const_cast<void *>(array->buffers[1])) +
3729 : nOffset;
3730 :
3731 1773 : if (panOffsets[iStart + nLength] > panOffsets[iStart])
3732 : {
3733 3186 : std::vector<bool> abyChildValidity(
3734 1593 : static_cast<size_t>(panOffsets[iStart + nLength] -
3735 1593 : panOffsets[iStart]),
3736 : true);
3737 1593 : size_t j = iStart;
3738 1593 : OffsetType nCurOffset = panOffsets[iStart];
3739 6694 : for (size_t i = 0; i < nLength; ++i)
3740 : {
3741 5101 : if (abyValidityFromFilters[i])
3742 : {
3743 2142 : const auto nSize =
3744 2142 : panOffsets[i + iStart + 1] - panOffsets[i + iStart];
3745 2142 : panOffsets[j] = nCurOffset;
3746 2142 : nCurOffset += nSize;
3747 2142 : ++j;
3748 : }
3749 : else
3750 : {
3751 2959 : const auto nStartOffset = panOffsets[i + iStart];
3752 2959 : const auto nEndOffset = panOffsets[i + iStart + 1];
3753 2959 : if (nStartOffset != nEndOffset)
3754 : {
3755 3073 : if (nStartOffset >=
3756 1538 : panOffsets[iStart] + abyChildValidity.size())
3757 : {
3758 : // shouldn't happen in sane arrays...
3759 0 : CPLError(CE_Failure, CPLE_AppDefined,
3760 : "nStartOffset >= panOffsets[iStart] + "
3761 : "abyChildValidity.size()");
3762 0 : return false;
3763 : }
3764 : // nEndOffset might be equal to abyChildValidity.size()
3765 3073 : if (nEndOffset >
3766 1538 : panOffsets[iStart] + abyChildValidity.size())
3767 : {
3768 : // shouldn't happen in sane arrays...
3769 0 : CPLError(CE_Failure, CPLE_AppDefined,
3770 : "nEndOffset > panOffsets[iStart] + "
3771 : "abyChildValidity.size()");
3772 0 : return false;
3773 : }
3774 1538 : for (auto k = nStartOffset - panOffsets[iStart];
3775 4652 : k < nEndOffset - panOffsets[iStart]; ++k)
3776 3114 : abyChildValidity[static_cast<size_t>(k)] = false;
3777 : }
3778 : }
3779 : }
3780 1593 : panOffsets[j] = nCurOffset;
3781 1593 : const size_t nChildNewLength = static_cast<size_t>(panOffsets[j]);
3782 : // To please Arrow validation
3783 4552 : for (; j < iStart + nLength; ++j)
3784 2959 : panOffsets[j] = nCurOffset;
3785 :
3786 1593 : if (!CompactArray(psChildSchema, psChildArray,
3787 1593 : static_cast<size_t>(panOffsets[iStart]),
3788 : abyChildValidity, nChildNewLength))
3789 0 : return false;
3790 :
3791 1593 : CPLAssert(psChildArray->length ==
3792 : static_cast<int64_t>(nChildNewLength));
3793 : }
3794 : else
3795 : {
3796 180 : InvalidateNullCountRec(psChildSchema, psChildArray);
3797 : }
3798 :
3799 1773 : if (schema->flags & ARROW_FLAG_NULLABLE)
3800 1773 : CompactValidityBuffer(schema, array, iStart, abyValidityFromFilters,
3801 : nNewLength);
3802 :
3803 1773 : array->length = nNewLength;
3804 :
3805 1773 : return true;
3806 : }
3807 :
3808 : /************************************************************************/
3809 : /* CompactFixedSizeListArray() */
3810 : /************************************************************************/
3811 :
3812 : static bool
3813 809 : CompactFixedSizeListArray(const struct ArrowSchema *schema,
3814 : struct ArrowArray *array, size_t N, size_t iStart,
3815 : const std::vector<bool> &abyValidityFromFilters,
3816 : size_t nNewLength)
3817 : {
3818 809 : CPLAssert(static_cast<size_t>(array->length) >=
3819 : iStart + abyValidityFromFilters.size());
3820 809 : CPLAssert(array->n_children == 1);
3821 :
3822 809 : const auto psChildSchema = schema->children[0];
3823 809 : const auto psChildArray = array->children[0];
3824 :
3825 809 : const size_t nLength = abyValidityFromFilters.size();
3826 809 : const size_t nOffset = static_cast<size_t>(array->offset);
3827 1618 : std::vector<bool> abyChildValidity(N * nLength, true);
3828 809 : size_t nChildNewLength = (iStart + nOffset) * N;
3829 809 : size_t nSrcLength = 0;
3830 3198 : for (size_t i = 0; i < nLength; ++i)
3831 : {
3832 2389 : if (abyValidityFromFilters[i])
3833 : {
3834 1015 : nChildNewLength += N;
3835 1015 : nSrcLength++;
3836 : }
3837 : else
3838 : {
3839 1374 : const size_t nStartOffset = i * N;
3840 1374 : const size_t nEndOffset = (i + 1) * N;
3841 4122 : for (size_t k = nStartOffset; k < nEndOffset; ++k)
3842 2748 : abyChildValidity[k] = false;
3843 : }
3844 : }
3845 809 : CPL_IGNORE_RET_VAL(nSrcLength);
3846 809 : CPLAssert(iStart + nSrcLength == nNewLength);
3847 :
3848 809 : if (!CompactArray(psChildSchema, psChildArray, (iStart + nOffset) * N,
3849 : abyChildValidity, nChildNewLength))
3850 0 : return false;
3851 :
3852 809 : if (schema->flags & ARROW_FLAG_NULLABLE)
3853 809 : CompactValidityBuffer(schema, array, iStart, abyValidityFromFilters,
3854 : nNewLength);
3855 :
3856 809 : array->length = nNewLength;
3857 :
3858 809 : CPLAssert(psChildArray->length >=
3859 : static_cast<int64_t>(N) * (array->length + array->offset));
3860 :
3861 809 : return true;
3862 : }
3863 :
3864 : /************************************************************************/
3865 : /* CompactMapArray() */
3866 : /************************************************************************/
3867 :
3868 561 : static bool CompactMapArray(const struct ArrowSchema *schema,
3869 : struct ArrowArray *array, size_t iStart,
3870 : const std::vector<bool> &abyValidityFromFilters,
3871 : size_t nNewLength)
3872 : {
3873 561 : return CompactListArray<uint32_t>(schema, array, iStart,
3874 561 : abyValidityFromFilters, nNewLength);
3875 : }
3876 :
3877 : /************************************************************************/
3878 : /* CompactArray() */
3879 : /************************************************************************/
3880 :
3881 8455 : static bool CompactArray(const struct ArrowSchema *schema,
3882 : struct ArrowArray *array, size_t iStart,
3883 : const std::vector<bool> &abyValidityFromFilters,
3884 : size_t nNewLength)
3885 : {
3886 8455 : const char *format = schema->format;
3887 :
3888 8455 : if (IsStructure(format))
3889 : {
3890 582 : if (!CompactStructArray(schema, array, iStart, abyValidityFromFilters,
3891 : nNewLength))
3892 0 : return false;
3893 : }
3894 7873 : else if (IsList(format))
3895 : {
3896 1209 : if (!CompactListArray<uint32_t>(schema, array, iStart,
3897 : abyValidityFromFilters, nNewLength))
3898 0 : return false;
3899 : }
3900 6664 : else if (IsLargeList(format))
3901 : {
3902 3 : if (!CompactListArray<uint64_t>(schema, array, iStart,
3903 : abyValidityFromFilters, nNewLength))
3904 0 : return false;
3905 : }
3906 6661 : else if (IsMap(format))
3907 : {
3908 561 : if (!CompactMapArray(schema, array, iStart, abyValidityFromFilters,
3909 : nNewLength))
3910 0 : return false;
3911 : }
3912 6100 : else if (IsFixedSizeList(format))
3913 : {
3914 809 : const int N = GetFixedSizeList(format);
3915 809 : if (N <= 0)
3916 0 : return false;
3917 809 : if (!CompactFixedSizeListArray(schema, array, static_cast<size_t>(N),
3918 : iStart, abyValidityFromFilters,
3919 : nNewLength))
3920 0 : return false;
3921 : }
3922 5291 : else if (IsBoolean(format))
3923 : {
3924 224 : CompactBoolArray(schema, array, iStart, abyValidityFromFilters,
3925 : nNewLength);
3926 : }
3927 5067 : else if (IsInt8(format) || IsUInt8(format))
3928 : {
3929 444 : CompactPrimitiveArray<uint8_t>(schema, array, iStart,
3930 : abyValidityFromFilters, nNewLength);
3931 : }
3932 4623 : else if (IsInt16(format) || IsUInt16(format) || IsFloat16(format))
3933 : {
3934 458 : CompactPrimitiveArray<uint16_t>(schema, array, iStart,
3935 : abyValidityFromFilters, nNewLength);
3936 : }
3937 8037 : else if (IsInt32(format) || IsUInt32(format) || IsFloat32(format) ||
3938 11539 : strcmp(format, "tdD") == 0 || strcmp(format, "tts") == 0 ||
3939 3502 : strcmp(format, "ttm") == 0)
3940 : {
3941 794 : CompactPrimitiveArray<uint32_t>(schema, array, iStart,
3942 : abyValidityFromFilters, nNewLength);
3943 : }
3944 6023 : else if (IsInt64(format) || IsUInt64(format) || IsFloat64(format) ||
3945 1997 : strcmp(format, "tdm") == 0 || strcmp(format, "ttu") == 0 ||
3946 6023 : strcmp(format, "ttn") == 0 || strncmp(format, "ts", 2) == 0)
3947 : {
3948 1879 : CompactPrimitiveArray<uint64_t>(schema, array, iStart,
3949 : abyValidityFromFilters, nNewLength);
3950 : }
3951 1492 : else if (IsString(format) || IsBinary(format))
3952 : {
3953 983 : CompactStringOrBinaryArray<uint32_t>(
3954 : schema, array, iStart, abyValidityFromFilters, nNewLength);
3955 : }
3956 509 : else if (IsLargeString(format) || IsLargeBinary(format))
3957 : {
3958 204 : CompactStringOrBinaryArray<uint64_t>(
3959 : schema, array, iStart, abyValidityFromFilters, nNewLength);
3960 : }
3961 305 : else if (IsStringView(format))
3962 : {
3963 0 : CompactStringViewArray(schema, array, iStart, abyValidityFromFilters,
3964 : nNewLength);
3965 : }
3966 305 : else if (IsFixedWidthBinary(format))
3967 : {
3968 67 : const int nWidth = GetFixedWithBinary(format);
3969 67 : CompactFixedWidthArray(schema, array, nWidth, iStart,
3970 : abyValidityFromFilters, nNewLength);
3971 : }
3972 238 : else if (IsDecimal(format))
3973 : {
3974 238 : int nPrecision = 0;
3975 238 : int nScale = 0;
3976 238 : int nWidthInBytes = 0;
3977 238 : if (!ParseDecimalFormat(format, nPrecision, nScale, nWidthInBytes))
3978 : {
3979 0 : CPLError(CE_Failure, CPLE_AppDefined,
3980 : "Unexpected error in PostFilterArrowArray(): unhandled "
3981 : "field format: %s",
3982 : format);
3983 :
3984 0 : return false;
3985 : }
3986 238 : CompactFixedWidthArray(schema, array, nWidthInBytes, iStart,
3987 : abyValidityFromFilters, nNewLength);
3988 : }
3989 : else
3990 : {
3991 0 : CPLError(CE_Failure, CPLE_AppDefined,
3992 : "Unexpected error in CompactArray(): unhandled "
3993 : "field format: %s",
3994 : format);
3995 0 : return false;
3996 : }
3997 :
3998 8455 : return true;
3999 : }
4000 :
4001 : /************************************************************************/
4002 : /* FillValidityArrayFromWKBArray() */
4003 : /************************************************************************/
4004 :
4005 : template <class OffsetType>
4006 : static size_t
4007 21 : FillValidityArrayFromWKBArray(struct ArrowArray *array, const OGRLayer *poLayer,
4008 : std::vector<bool> &abyValidityFromFilters)
4009 : {
4010 21 : const size_t nLength = static_cast<size_t>(array->length);
4011 14 : const uint8_t *pabyValidity =
4012 21 : array->null_count == 0
4013 : ? nullptr
4014 7 : : static_cast<const uint8_t *>(array->buffers[0]);
4015 21 : const size_t nOffset = static_cast<size_t>(array->offset);
4016 21 : const OffsetType *panOffsets =
4017 21 : static_cast<const OffsetType *>(array->buffers[1]) + nOffset;
4018 21 : const GByte *pabyData = static_cast<const GByte *>(array->buffers[2]);
4019 21 : OGREnvelope sEnvelope;
4020 21 : abyValidityFromFilters.resize(nLength);
4021 21 : size_t nCountIntersecting = 0;
4022 138 : for (size_t i = 0; i < nLength; ++i)
4023 : {
4024 117 : if (!pabyValidity || TestBit(pabyValidity, i + nOffset))
4025 : {
4026 110 : const GByte *pabyWKB = pabyData + panOffsets[i];
4027 110 : const size_t nWKBSize =
4028 110 : static_cast<size_t>(panOffsets[i + 1] - panOffsets[i]);
4029 110 : if (poLayer->FilterWKBGeometry(pabyWKB, nWKBSize,
4030 : /* bEnvelopeAlreadySet=*/false,
4031 : sEnvelope))
4032 : {
4033 29 : abyValidityFromFilters[i] = true;
4034 29 : nCountIntersecting++;
4035 : }
4036 : }
4037 : }
4038 21 : return nCountIntersecting;
4039 : }
4040 :
4041 : /************************************************************************/
4042 : /* ArrowTimestampToOGRDateTime() */
4043 : /************************************************************************/
4044 :
4045 107 : static void ArrowTimestampToOGRDateTime(int64_t nTimestamp,
4046 : int nInvFactorToSecond,
4047 : const char *pszTZ, OGRFeature &oFeature,
4048 : int iField)
4049 : {
4050 107 : double floatingPart = 0;
4051 107 : if (nInvFactorToSecond)
4052 : {
4053 107 : floatingPart =
4054 107 : (nTimestamp % nInvFactorToSecond) / double(nInvFactorToSecond);
4055 107 : nTimestamp /= nInvFactorToSecond;
4056 : }
4057 107 : int nTZFlag = 0;
4058 107 : const size_t nTZLen = strlen(pszTZ);
4059 107 : if ((nTZLen == 3 && strcmp(pszTZ, "UTC") == 0) ||
4060 0 : (nTZLen == 7 && strcmp(pszTZ, "Etc/UTC") == 0))
4061 : {
4062 17 : nTZFlag = 100;
4063 : }
4064 90 : else if (nTZLen == 6 && (pszTZ[0] == '+' || pszTZ[0] == '-') &&
4065 33 : pszTZ[3] == ':')
4066 : {
4067 33 : int nTZHour = atoi(pszTZ + 1);
4068 33 : int nTZMin = atoi(pszTZ + 4);
4069 33 : if (nTZHour >= 0 && nTZHour <= 14 && nTZMin >= 0 && nTZMin < 60 &&
4070 33 : (nTZMin % 15) == 0)
4071 : {
4072 33 : nTZFlag = (nTZHour * 4) + (nTZMin / 15);
4073 33 : if (pszTZ[0] == '+')
4074 : {
4075 24 : nTZFlag = 100 + nTZFlag;
4076 24 : nTimestamp += nTZHour * 3600 + nTZMin * 60;
4077 : }
4078 : else
4079 : {
4080 9 : nTZFlag = 100 - nTZFlag;
4081 9 : nTimestamp -= nTZHour * 3600 + nTZMin * 60;
4082 : }
4083 : }
4084 : }
4085 : struct tm dt;
4086 107 : CPLUnixTimeToYMDHMS(nTimestamp, &dt);
4087 107 : oFeature.SetField(iField, dt.tm_year + 1900, dt.tm_mon + 1, dt.tm_mday,
4088 : dt.tm_hour, dt.tm_min,
4089 107 : static_cast<float>(dt.tm_sec + floatingPart), nTZFlag);
4090 107 : }
4091 :
4092 : /************************************************************************/
4093 : /* BuildMapFieldNameToArrowPath() */
4094 : /************************************************************************/
4095 :
4096 : static void
4097 334 : BuildMapFieldNameToArrowPath(const struct ArrowSchema *schema,
4098 : std::map<std::string, std::vector<int>> &oMap,
4099 : const std::string &osPrefix,
4100 : std::vector<int> &anArrowPath)
4101 : {
4102 7833 : for (int64_t i = 0; i < schema->n_children; ++i)
4103 : {
4104 7499 : auto psChild = schema->children[i];
4105 7499 : anArrowPath.push_back(static_cast<int>(i));
4106 7499 : if (IsStructure(psChild->format))
4107 : {
4108 400 : std::string osNewPrefix(osPrefix);
4109 200 : osNewPrefix += psChild->name;
4110 200 : osNewPrefix += ".";
4111 200 : BuildMapFieldNameToArrowPath(psChild, oMap, osNewPrefix,
4112 : anArrowPath);
4113 : }
4114 : else
4115 : {
4116 7299 : oMap[osPrefix + psChild->name] = anArrowPath;
4117 : }
4118 7499 : anArrowPath.pop_back();
4119 : }
4120 334 : }
4121 :
4122 : /************************************************************************/
4123 : /* FillFieldList() */
4124 : /************************************************************************/
4125 :
4126 : template <typename ListOffsetType, typename ArrowType,
4127 : typename OGRType = ArrowType>
4128 167 : inline static void FillFieldList(const struct ArrowArray *array,
4129 : int iOGRFieldIdx, size_t nOffsettedIndex,
4130 : const struct ArrowArray *childArray,
4131 : OGRFeature &oFeature)
4132 : {
4133 167 : const auto panOffsets =
4134 167 : static_cast<const ListOffsetType *>(array->buffers[1]) +
4135 : nOffsettedIndex;
4136 334 : std::vector<OGRType> aValues;
4137 167 : const auto *paValues =
4138 167 : static_cast<const ArrowType *>(childArray->buffers[1]);
4139 167 : for (size_t i = static_cast<size_t>(panOffsets[0]);
4140 509 : i < static_cast<size_t>(panOffsets[1]); ++i)
4141 : {
4142 342 : aValues.push_back(static_cast<OGRType>(paValues[i]));
4143 : }
4144 167 : oFeature.SetField(iOGRFieldIdx, static_cast<int>(aValues.size()),
4145 : aValues.data());
4146 167 : }
4147 :
4148 : /************************************************************************/
4149 : /* FillFieldListFromBool() */
4150 : /************************************************************************/
4151 :
4152 : template <typename ListOffsetType>
4153 : inline static void
4154 16 : FillFieldListFromBool(const struct ArrowArray *array, int iOGRFieldIdx,
4155 : size_t nOffsettedIndex,
4156 : const struct ArrowArray *childArray, OGRFeature &oFeature)
4157 : {
4158 16 : const auto panOffsets =
4159 16 : static_cast<const ListOffsetType *>(array->buffers[1]) +
4160 : nOffsettedIndex;
4161 32 : std::vector<int> aValues;
4162 16 : const auto *paValues = static_cast<const uint8_t *>(childArray->buffers[1]);
4163 16 : for (size_t i = static_cast<size_t>(panOffsets[0]);
4164 48 : i < static_cast<size_t>(panOffsets[1]); ++i)
4165 : {
4166 32 : aValues.push_back(TestBit(paValues, i) ? 1 : 0);
4167 : }
4168 16 : oFeature.SetField(iOGRFieldIdx, static_cast<int>(aValues.size()),
4169 16 : aValues.data());
4170 16 : }
4171 :
4172 : /************************************************************************/
4173 : /* FillFieldListFromHalfFloat() */
4174 : /************************************************************************/
4175 :
4176 : template <typename ListOffsetType>
4177 8 : inline static void FillFieldListFromHalfFloat(
4178 : const struct ArrowArray *array, int iOGRFieldIdx, size_t nOffsettedIndex,
4179 : const struct ArrowArray *childArray, OGRFeature &oFeature)
4180 : {
4181 8 : const auto panOffsets =
4182 8 : static_cast<const ListOffsetType *>(array->buffers[1]) +
4183 : nOffsettedIndex;
4184 16 : std::vector<double> aValues;
4185 8 : const auto *phfValues =
4186 8 : static_cast<const GFloat16 *>(childArray->buffers[1]);
4187 8 : for (size_t i = static_cast<size_t>(panOffsets[0]);
4188 24 : i < static_cast<size_t>(panOffsets[1]); ++i)
4189 : {
4190 16 : aValues.push_back(static_cast<double>(phfValues[i]));
4191 : }
4192 8 : oFeature.SetField(iOGRFieldIdx, static_cast<int>(aValues.size()),
4193 8 : aValues.data());
4194 8 : }
4195 :
4196 : /************************************************************************/
4197 : /* FillFieldListFromString() */
4198 : /************************************************************************/
4199 :
4200 : template <typename ListOffsetType, typename StringOffsetType>
4201 32 : inline static void FillFieldListFromString(const struct ArrowArray *array,
4202 : int iOGRFieldIdx,
4203 : size_t nOffsettedIndex,
4204 : const struct ArrowArray *childArray,
4205 : OGRFeature &oFeature)
4206 : {
4207 32 : const auto panOffsets =
4208 32 : static_cast<const ListOffsetType *>(array->buffers[1]) +
4209 : nOffsettedIndex;
4210 64 : CPLStringList aosVals;
4211 90 : for (size_t i = static_cast<size_t>(panOffsets[0]);
4212 90 : i < static_cast<size_t>(panOffsets[1]); ++i)
4213 : {
4214 58 : aosVals.push_back(
4215 : GetStringAsStringView<StringOffsetType>(childArray, i));
4216 : }
4217 32 : oFeature.SetField(iOGRFieldIdx, aosVals.List());
4218 32 : }
4219 :
4220 : /************************************************************************/
4221 : /* FillFieldListFromStringView() */
4222 : /************************************************************************/
4223 :
4224 : template <typename ListOffsetType>
4225 0 : inline static void FillFieldListFromStringView(
4226 : const struct ArrowArray *array, int iOGRFieldIdx, size_t nOffsettedIndex,
4227 : const struct ArrowArray *childArray, OGRFeature &oFeature)
4228 : {
4229 0 : const auto panOffsets =
4230 0 : static_cast<const ListOffsetType *>(array->buffers[1]) +
4231 : nOffsettedIndex;
4232 0 : CPLStringList aosVals;
4233 0 : for (size_t i = static_cast<size_t>(panOffsets[0]);
4234 0 : i < static_cast<size_t>(panOffsets[1]); ++i)
4235 : {
4236 0 : aosVals.push_back(GetStringView(childArray, i));
4237 : }
4238 0 : oFeature.SetField(iOGRFieldIdx, aosVals.List());
4239 0 : }
4240 :
4241 : /************************************************************************/
4242 : /* FillFieldFixedSizeList() */
4243 : /************************************************************************/
4244 :
4245 : template <typename ArrowType, typename OGRType = ArrowType>
4246 120 : inline static void FillFieldFixedSizeList(
4247 : const struct ArrowArray *, int iOGRFieldIdx, size_t nOffsettedIndex,
4248 : const int nItems, const struct ArrowArray *childArray, OGRFeature &oFeature)
4249 : {
4250 240 : std::vector<OGRType> aValues;
4251 120 : const auto *paValues =
4252 120 : static_cast<const ArrowType *>(childArray->buffers[1]) +
4253 120 : childArray->offset + nOffsettedIndex * nItems;
4254 360 : for (int i = 0; i < nItems; ++i)
4255 : {
4256 240 : aValues.push_back(static_cast<OGRType>(paValues[i]));
4257 : }
4258 120 : oFeature.SetField(iOGRFieldIdx, static_cast<int>(aValues.size()),
4259 : aValues.data());
4260 120 : }
4261 :
4262 : /************************************************************************/
4263 : /* FillFieldFixedSizeListString() */
4264 : /************************************************************************/
4265 :
4266 : template <typename StringOffsetType>
4267 17 : inline static void FillFieldFixedSizeListString(
4268 : const struct ArrowArray *, int iOGRFieldIdx, size_t nOffsettedIndex,
4269 : const int nItems, const struct ArrowArray *childArray, OGRFeature &oFeature)
4270 : {
4271 34 : CPLStringList aosVals;
4272 51 : for (int i = 0; i < nItems; ++i)
4273 : {
4274 34 : aosVals.push_back(GetStringAsStringView<StringOffsetType>(
4275 34 : childArray, nOffsettedIndex * nItems + i));
4276 : }
4277 17 : oFeature.SetField(iOGRFieldIdx, aosVals.List());
4278 17 : }
4279 :
4280 : /************************************************************************/
4281 : /* FillFieldFixedSizeListStringView() */
4282 : /************************************************************************/
4283 :
4284 0 : inline static void FillFieldFixedSizeListStringView(
4285 : const struct ArrowArray *, int iOGRFieldIdx, size_t nOffsettedIndex,
4286 : const int nItems, const struct ArrowArray *childArray, OGRFeature &oFeature)
4287 : {
4288 0 : CPLStringList aosVals;
4289 0 : for (int i = 0; i < nItems; ++i)
4290 : {
4291 0 : aosVals.push_back(
4292 0 : GetStringView(childArray, nOffsettedIndex * nItems + i));
4293 : }
4294 0 : oFeature.SetField(iOGRFieldIdx, aosVals.List());
4295 0 : }
4296 :
4297 : /************************************************************************/
4298 : /* GetValue() */
4299 : /************************************************************************/
4300 :
4301 : template <typename ArrowType>
4302 245 : inline static ArrowType GetValue(const struct ArrowArray *array,
4303 : size_t iFeature)
4304 : {
4305 245 : const auto *panValues = static_cast<const ArrowType *>(array->buffers[1]);
4306 245 : return panValues[iFeature + array->offset];
4307 : }
4308 :
4309 12 : template <> bool GetValue<bool>(const struct ArrowArray *array, size_t iFeature)
4310 : {
4311 12 : const auto *pabyValues = static_cast<const uint8_t *>(array->buffers[1]);
4312 12 : return TestBit(pabyValues, iFeature + static_cast<size_t>(array->offset));
4313 : }
4314 :
4315 : /************************************************************************/
4316 : /* GetValueFloat16() */
4317 : /************************************************************************/
4318 :
4319 23 : static float GetValueFloat16(const struct ArrowArray *array, const size_t nIdx)
4320 : {
4321 23 : const auto *panValues = static_cast<const uint16_t *>(array->buffers[1]);
4322 : const auto nFloat16AsUInt32 =
4323 23 : CPLHalfToFloat(panValues[nIdx + array->offset]);
4324 : float f;
4325 23 : memcpy(&f, &nFloat16AsUInt32, sizeof(f));
4326 23 : return f;
4327 : }
4328 :
4329 : /************************************************************************/
4330 : /* GetValueDecimal() */
4331 : /************************************************************************/
4332 :
4333 71 : static double GetValueDecimal(const struct ArrowArray *array,
4334 : const int nWidthIn64BitWord, const int nScale,
4335 : const size_t nIdx)
4336 : {
4337 : #ifdef CPL_LSB
4338 71 : const auto nIdxIn64BitWord = nIdx * nWidthIn64BitWord;
4339 : #else
4340 : const auto nIdxIn64BitWord =
4341 : nIdx * nWidthIn64BitWord + nWidthIn64BitWord - 1;
4342 : #endif
4343 71 : const auto *panValues = static_cast<const int64_t *>(array->buffers[1]);
4344 71 : const auto nVal =
4345 71 : panValues[nIdxIn64BitWord + array->offset * nWidthIn64BitWord];
4346 71 : return static_cast<double>(nVal) * std::pow(10.0, -nScale);
4347 : }
4348 :
4349 : /************************************************************************/
4350 : /* GetBinaryAsBase64() */
4351 : /************************************************************************/
4352 :
4353 : template <class OffsetType>
4354 8 : static std::string GetBinaryAsBase64(const struct ArrowArray *array,
4355 : const size_t nIdx)
4356 : {
4357 8 : const OffsetType *panOffsets =
4358 8 : static_cast<const OffsetType *>(array->buffers[1]) +
4359 8 : static_cast<size_t>(array->offset) + nIdx;
4360 8 : const GByte *pabyData = static_cast<const GByte *>(array->buffers[2]);
4361 8 : const size_t nLen = static_cast<size_t>(panOffsets[1] - panOffsets[0]);
4362 8 : if (nLen > static_cast<size_t>(std::numeric_limits<int>::max()))
4363 : {
4364 0 : CPLError(CE_Failure, CPLE_AppDefined, "Too large binary");
4365 0 : return std::string();
4366 : }
4367 16 : char *pszVal = CPLBase64Encode(
4368 8 : static_cast<int>(nLen), pabyData + static_cast<size_t>(panOffsets[0]));
4369 16 : std::string osStr(pszVal);
4370 8 : CPLFree(pszVal);
4371 8 : return osStr;
4372 : }
4373 :
4374 : /************************************************************************/
4375 : /* GetValueFixedWithBinaryAsBase64() */
4376 : /************************************************************************/
4377 :
4378 : static std::string
4379 4 : GetValueFixedWithBinaryAsBase64(const struct ArrowArray *array,
4380 : const int nWidth, const size_t nIdx)
4381 : {
4382 4 : const GByte *pabyData = static_cast<const GByte *>(array->buffers[1]);
4383 8 : char *pszVal = CPLBase64Encode(
4384 : nWidth,
4385 4 : pabyData + (static_cast<size_t>(array->offset) + nIdx) * nWidth);
4386 4 : std::string osStr(pszVal);
4387 4 : CPLFree(pszVal);
4388 4 : return osStr;
4389 : }
4390 :
4391 : static CPLJSONObject GetObjectAsJSON(const struct ArrowSchema *schema,
4392 : const struct ArrowArray *array,
4393 : const size_t nIdx);
4394 :
4395 : /************************************************************************/
4396 : /* AddToArray() */
4397 : /************************************************************************/
4398 :
4399 142 : static void AddToArray(CPLJSONArray &oArray, const struct ArrowSchema *schema,
4400 : const struct ArrowArray *array, const size_t nIdx)
4401 : {
4402 142 : if (IsBoolean(schema->format))
4403 7 : oArray.Add(GetValue<bool>(array, nIdx));
4404 135 : else if (IsUInt8(schema->format))
4405 13 : oArray.Add(GetValue<uint8_t>(array, nIdx));
4406 122 : else if (IsInt8(schema->format))
4407 7 : oArray.Add(GetValue<int8_t>(array, nIdx));
4408 115 : else if (IsUInt16(schema->format))
4409 7 : oArray.Add(GetValue<uint16_t>(array, nIdx));
4410 108 : else if (IsInt16(schema->format))
4411 7 : oArray.Add(GetValue<int16_t>(array, nIdx));
4412 101 : else if (IsUInt32(schema->format))
4413 7 : oArray.Add(static_cast<GIntBig>(GetValue<uint32_t>(array, nIdx)));
4414 94 : else if (IsInt32(schema->format))
4415 7 : oArray.Add(GetValue<int32_t>(array, nIdx));
4416 87 : else if (IsUInt64(schema->format))
4417 7 : oArray.Add(GetValue<uint64_t>(array, nIdx));
4418 80 : else if (IsInt64(schema->format))
4419 7 : oArray.Add(static_cast<GIntBig>(GetValue<int64_t>(array, nIdx)));
4420 73 : else if (IsFloat16(schema->format))
4421 7 : oArray.Add(static_cast<double>(GetValueFloat16(array, nIdx)));
4422 66 : else if (IsFloat32(schema->format))
4423 7 : oArray.Add(static_cast<double>(GetValue<float>(array, nIdx)));
4424 59 : else if (IsFloat64(schema->format))
4425 7 : oArray.Add(GetValue<double>(array, nIdx));
4426 52 : else if (IsString(schema->format))
4427 13 : oArray.Add(GetStringAsStringView<uint32_t>(array, nIdx));
4428 39 : else if (IsLargeString(schema->format))
4429 4 : oArray.Add(GetStringAsStringView<uint64_t>(array, nIdx));
4430 35 : else if (IsStringView(schema->format))
4431 0 : oArray.Add(GetStringView(array, nIdx));
4432 35 : else if (IsBinary(schema->format))
4433 2 : oArray.Add(GetBinaryAsBase64<uint32_t>(array, nIdx));
4434 33 : else if (IsLargeBinary(schema->format))
4435 2 : oArray.Add(GetBinaryAsBase64<uint64_t>(array, nIdx));
4436 31 : else if (IsFixedWidthBinary(schema->format))
4437 2 : oArray.Add(GetValueFixedWithBinaryAsBase64(
4438 2 : array, GetFixedWithBinary(schema->format), nIdx));
4439 29 : else if (IsDecimal(schema->format))
4440 : {
4441 7 : int nPrecision = 0;
4442 7 : int nScale = 0;
4443 7 : int nWidthInBytes = 0;
4444 7 : const bool bOK = ParseDecimalFormat(schema->format, nPrecision, nScale,
4445 7 : nWidthInBytes);
4446 : // Already validated
4447 7 : CPLAssert(bOK);
4448 7 : CPL_IGNORE_RET_VAL(bOK);
4449 7 : oArray.Add(GetValueDecimal(array, nWidthInBytes / 8, nScale, nIdx));
4450 : }
4451 : else
4452 22 : oArray.Add(GetObjectAsJSON(schema, array, nIdx));
4453 142 : }
4454 :
4455 : /************************************************************************/
4456 : /* GetListAsJSON() */
4457 : /************************************************************************/
4458 :
4459 : template <class OffsetType>
4460 112 : static CPLJSONArray GetListAsJSON(const struct ArrowSchema *schema,
4461 : const struct ArrowArray *array,
4462 : const size_t nIdx)
4463 : {
4464 112 : CPLJSONArray oArray;
4465 112 : const auto panOffsets = static_cast<const OffsetType *>(array->buffers[1]) +
4466 112 : array->offset + nIdx;
4467 112 : const auto childSchema = schema->children[0];
4468 112 : const auto childArray = array->children[0];
4469 7 : const uint8_t *pabyValidity =
4470 112 : childArray->null_count == 0
4471 : ? nullptr
4472 105 : : static_cast<const uint8_t *>(childArray->buffers[0]);
4473 278 : for (size_t k = static_cast<size_t>(panOffsets[0]);
4474 278 : k < static_cast<size_t>(panOffsets[1]); k++)
4475 : {
4476 318 : if (!pabyValidity ||
4477 152 : TestBit(pabyValidity, k + static_cast<size_t>(childArray->offset)))
4478 : {
4479 136 : AddToArray(oArray, childSchema, childArray, k);
4480 : }
4481 : else
4482 : {
4483 30 : oArray.AddNull();
4484 : }
4485 : }
4486 112 : return oArray;
4487 : }
4488 :
4489 : /************************************************************************/
4490 : /* GetFixedSizeListAsJSON() */
4491 : /************************************************************************/
4492 :
4493 3 : static CPLJSONArray GetFixedSizeListAsJSON(const struct ArrowSchema *schema,
4494 : const struct ArrowArray *array,
4495 : const size_t nIdx)
4496 : {
4497 3 : CPLJSONArray oArray;
4498 3 : const int nVals = GetFixedSizeList(schema->format);
4499 3 : const auto childSchema = schema->children[0];
4500 3 : const auto childArray = array->children[0];
4501 3 : const uint8_t *pabyValidity =
4502 3 : childArray->null_count == 0
4503 3 : ? nullptr
4504 3 : : static_cast<const uint8_t *>(childArray->buffers[0]);
4505 9 : for (size_t k = nIdx * nVals; k < (nIdx + 1) * nVals; k++)
4506 : {
4507 12 : if (!pabyValidity ||
4508 6 : TestBit(pabyValidity, k + static_cast<size_t>(childArray->offset)))
4509 : {
4510 6 : AddToArray(oArray, childSchema, childArray, k);
4511 : }
4512 : else
4513 : {
4514 0 : oArray.AddNull();
4515 : }
4516 : }
4517 3 : return oArray;
4518 : }
4519 :
4520 : /************************************************************************/
4521 : /* AddToDict() */
4522 : /************************************************************************/
4523 :
4524 198 : static void AddToDict(CPLJSONObject &oDict, const std::string &osKey,
4525 : const struct ArrowSchema *schema,
4526 : const struct ArrowArray *array, const size_t nIdx)
4527 : {
4528 198 : if (IsBoolean(schema->format))
4529 5 : oDict.Add(osKey, GetValue<bool>(array, nIdx));
4530 193 : else if (IsUInt8(schema->format))
4531 5 : oDict.Add(osKey, GetValue<uint8_t>(array, nIdx));
4532 188 : else if (IsInt8(schema->format))
4533 5 : oDict.Add(osKey, GetValue<int8_t>(array, nIdx));
4534 183 : else if (IsUInt16(schema->format))
4535 5 : oDict.Add(osKey, GetValue<uint16_t>(array, nIdx));
4536 178 : else if (IsInt16(schema->format))
4537 5 : oDict.Add(osKey, GetValue<int16_t>(array, nIdx));
4538 173 : else if (IsUInt32(schema->format))
4539 2 : oDict.Add(osKey, static_cast<GIntBig>(GetValue<uint32_t>(array, nIdx)));
4540 171 : else if (IsInt32(schema->format))
4541 6 : oDict.Add(osKey, GetValue<int32_t>(array, nIdx));
4542 165 : else if (IsUInt64(schema->format))
4543 5 : oDict.Add(osKey, GetValue<uint64_t>(array, nIdx));
4544 160 : else if (IsInt64(schema->format))
4545 22 : oDict.Add(osKey, static_cast<GIntBig>(GetValue<int64_t>(array, nIdx)));
4546 138 : else if (IsFloat16(schema->format))
4547 2 : oDict.Add(osKey, static_cast<double>(GetValueFloat16(array, nIdx)));
4548 136 : else if (IsFloat32(schema->format))
4549 5 : oDict.Add(osKey, static_cast<double>(GetValue<float>(array, nIdx)));
4550 131 : else if (IsFloat64(schema->format))
4551 19 : oDict.Add(osKey, GetValue<double>(array, nIdx));
4552 112 : else if (IsString(schema->format))
4553 14 : oDict.Add(osKey, GetStringAsStringView<uint32_t>(array, nIdx));
4554 98 : else if (IsLargeString(schema->format))
4555 2 : oDict.Add(osKey, GetStringAsStringView<uint64_t>(array, nIdx));
4556 96 : else if (IsStringView(schema->format))
4557 0 : oDict.Add(osKey, GetStringView(array, nIdx));
4558 96 : else if (IsBinary(schema->format))
4559 2 : oDict.Add(osKey, GetBinaryAsBase64<uint32_t>(array, nIdx));
4560 94 : else if (IsLargeBinary(schema->format))
4561 2 : oDict.Add(osKey, GetBinaryAsBase64<uint64_t>(array, nIdx));
4562 92 : else if (IsFixedWidthBinary(schema->format))
4563 2 : oDict.Add(osKey, GetValueFixedWithBinaryAsBase64(
4564 2 : array, GetFixedWithBinary(schema->format), nIdx));
4565 90 : else if (IsDecimal(schema->format))
4566 : {
4567 8 : int nPrecision = 0;
4568 8 : int nScale = 0;
4569 8 : int nWidthInBytes = 0;
4570 8 : const bool bOK = ParseDecimalFormat(schema->format, nPrecision, nScale,
4571 8 : nWidthInBytes);
4572 : // Already validated
4573 8 : CPLAssert(bOK);
4574 8 : CPL_IGNORE_RET_VAL(bOK);
4575 8 : oDict.Add(osKey,
4576 : GetValueDecimal(array, nWidthInBytes / 8, nScale, nIdx));
4577 : }
4578 : else
4579 82 : oDict.Add(osKey, GetObjectAsJSON(schema, array, nIdx));
4580 198 : }
4581 :
4582 : /************************************************************************/
4583 : /* GetMapAsJSON() */
4584 : /************************************************************************/
4585 :
4586 243 : static CPLJSONObject GetMapAsJSON(const struct ArrowSchema *schema,
4587 : const struct ArrowArray *array,
4588 : const size_t nIdx)
4589 : {
4590 243 : const auto schemaStruct = schema->children[0];
4591 243 : if (!IsStructure(schemaStruct->format))
4592 : {
4593 0 : CPLError(CE_Failure, CPLE_AppDefined,
4594 : "GetMapAsJSON(): !IsStructure(schemaStruct->format))");
4595 0 : return CPLJSONObject();
4596 : }
4597 243 : const auto schemaKey = schemaStruct->children[0];
4598 243 : const auto schemaValues = schemaStruct->children[1];
4599 243 : if (!IsString(schemaKey->format))
4600 : {
4601 0 : CPLError(CE_Failure, CPLE_AppDefined,
4602 : "GetMapAsJSON(): !IsString(schemaKey->format))");
4603 0 : return CPLJSONObject();
4604 : }
4605 243 : const auto arrayKeys = array->children[0]->children[0];
4606 243 : const auto arrayValues = array->children[0]->children[1];
4607 :
4608 486 : CPLJSONObject oDict;
4609 243 : const auto panOffsets =
4610 243 : static_cast<const uint32_t *>(array->buffers[1]) + array->offset + nIdx;
4611 243 : const uint8_t *pabyValidityKeys =
4612 243 : arrayKeys->null_count == 0
4613 243 : ? nullptr
4614 0 : : static_cast<const uint8_t *>(arrayKeys->buffers[0]);
4615 243 : const uint32_t *panOffsetsKeys =
4616 243 : static_cast<const uint32_t *>(arrayKeys->buffers[1]) +
4617 243 : arrayKeys->offset;
4618 243 : const char *pabyKeys = static_cast<const char *>(arrayKeys->buffers[2]);
4619 243 : const uint8_t *pabyValidityValues =
4620 243 : arrayValues->null_count == 0
4621 243 : ? nullptr
4622 236 : : static_cast<const uint8_t *>(arrayValues->buffers[0]);
4623 463 : for (uint32_t k = panOffsets[0]; k < panOffsets[1]; k++)
4624 : {
4625 220 : if (!pabyValidityKeys ||
4626 0 : TestBit(pabyValidityKeys,
4627 0 : k + static_cast<size_t>(arrayKeys->offset)))
4628 : {
4629 440 : std::string osKey;
4630 220 : osKey.assign(pabyKeys + panOffsetsKeys[k],
4631 220 : panOffsetsKeys[k + 1] - panOffsetsKeys[k]);
4632 :
4633 433 : if (!pabyValidityValues ||
4634 213 : TestBit(pabyValidityValues,
4635 213 : k + static_cast<size_t>(arrayValues->offset)))
4636 : {
4637 168 : AddToDict(oDict, osKey, schemaValues, arrayValues, k);
4638 : }
4639 : else
4640 : {
4641 52 : oDict.AddNull(osKey);
4642 : }
4643 : }
4644 : }
4645 243 : return oDict;
4646 : }
4647 :
4648 : /************************************************************************/
4649 : /* GetStructureAsJSON() */
4650 : /************************************************************************/
4651 :
4652 16 : static CPLJSONObject GetStructureAsJSON(const struct ArrowSchema *schema,
4653 : const struct ArrowArray *array,
4654 : const size_t nIdx)
4655 : {
4656 16 : CPLJSONObject oDict;
4657 62 : for (int64_t k = 0; k < array->n_children; k++)
4658 : {
4659 46 : const uint8_t *pabyValidityValues =
4660 46 : array->children[k]->null_count == 0
4661 46 : ? nullptr
4662 32 : : static_cast<const uint8_t *>(array->children[k]->buffers[0]);
4663 78 : if (!pabyValidityValues ||
4664 32 : TestBit(pabyValidityValues,
4665 32 : nIdx + static_cast<size_t>(array->children[k]->offset)))
4666 : {
4667 30 : AddToDict(oDict, schema->children[k]->name, schema->children[k],
4668 30 : array->children[k], nIdx);
4669 : }
4670 : else
4671 : {
4672 16 : oDict.AddNull(schema->children[k]->name);
4673 : }
4674 : }
4675 16 : return oDict;
4676 : }
4677 :
4678 : /************************************************************************/
4679 : /* GetObjectAsJSON() */
4680 : /************************************************************************/
4681 :
4682 104 : static CPLJSONObject GetObjectAsJSON(const struct ArrowSchema *schema,
4683 : const struct ArrowArray *array,
4684 : const size_t nIdx)
4685 : {
4686 104 : if (IsMap(schema->format))
4687 4 : return GetMapAsJSON(schema, array, nIdx);
4688 100 : else if (IsList(schema->format))
4689 156 : return GetListAsJSON<uint32_t>(schema, array, nIdx);
4690 22 : else if (IsLargeList(schema->format))
4691 6 : return GetListAsJSON<uint64_t>(schema, array, nIdx);
4692 19 : else if (IsFixedSizeList(schema->format))
4693 6 : return GetFixedSizeListAsJSON(schema, array, nIdx);
4694 16 : else if (IsStructure(schema->format))
4695 16 : return GetStructureAsJSON(schema, array, nIdx);
4696 : else
4697 : {
4698 0 : CPLError(CE_Failure, CPLE_AppDefined,
4699 : "GetObjectAsJSON(): unhandled value format: %s",
4700 0 : schema->format);
4701 0 : return CPLJSONObject();
4702 : }
4703 : }
4704 :
4705 : /************************************************************************/
4706 : /* SetFieldForOtherFormats() */
4707 : /************************************************************************/
4708 :
4709 856 : static bool SetFieldForOtherFormats(OGRFeature &oFeature,
4710 : const int iOGRFieldIndex,
4711 : const size_t nOffsettedIndex,
4712 : const struct ArrowSchema *schema,
4713 : const struct ArrowArray *array)
4714 : {
4715 856 : const char *format = schema->format;
4716 856 : if (IsFloat16(format))
4717 : {
4718 4 : oFeature.SetField(
4719 : iOGRFieldIndex,
4720 4 : static_cast<double>(GetValueFloat16(
4721 4 : array, nOffsettedIndex - static_cast<size_t>(array->offset))));
4722 : }
4723 :
4724 852 : else if (IsFixedWidthBinary(format))
4725 : {
4726 : // Fixed width binary
4727 17 : const int nWidth = GetFixedWithBinary(format);
4728 17 : oFeature.SetField(iOGRFieldIndex, nWidth,
4729 17 : static_cast<const GByte *>(array->buffers[1]) +
4730 17 : nOffsettedIndex * nWidth);
4731 : }
4732 835 : else if (format[0] == 't' && format[1] == 'd' &&
4733 38 : format[2] == 'D') // strcmp(format, "tdD") == 0
4734 : {
4735 : // date32[days]
4736 : // number of days since Epoch
4737 33 : int64_t timestamp = static_cast<int64_t>(static_cast<const int32_t *>(
4738 33 : array->buffers[1])[nOffsettedIndex]) *
4739 : 3600 * 24;
4740 : struct tm dt;
4741 33 : CPLUnixTimeToYMDHMS(timestamp, &dt);
4742 33 : oFeature.SetField(iOGRFieldIndex, dt.tm_year + 1900, dt.tm_mon + 1,
4743 : dt.tm_mday, 0, 0, 0);
4744 33 : return true;
4745 : }
4746 802 : else if (format[0] == 't' && format[1] == 'd' &&
4747 5 : format[2] == 'm') // strcmp(format, "tdm") == 0
4748 : {
4749 : // date64[milliseconds]
4750 : // number of milliseconds since Epoch
4751 5 : int64_t timestamp =
4752 5 : static_cast<const int64_t *>(array->buffers[1])[nOffsettedIndex] /
4753 : 1000;
4754 : struct tm dt;
4755 5 : CPLUnixTimeToYMDHMS(timestamp, &dt);
4756 5 : oFeature.SetField(iOGRFieldIndex, dt.tm_year + 1900, dt.tm_mon + 1,
4757 5 : dt.tm_mday, 0, 0, 0);
4758 : }
4759 797 : else if (format[0] == 't' && format[1] == 't' &&
4760 39 : format[2] == 's') // strcmp(format, "tts") == 0
4761 : {
4762 : // time32 [seconds]
4763 0 : int32_t value =
4764 0 : static_cast<const int32_t *>(array->buffers[1])[nOffsettedIndex];
4765 0 : const int nHour = value / 3600;
4766 0 : const int nMinute = (value / 60) % 60;
4767 0 : const int nSecond = value % 60;
4768 0 : oFeature.SetField(iOGRFieldIndex, 0, 0, 0, nHour, nMinute,
4769 0 : static_cast<float>(nSecond));
4770 : }
4771 797 : else if (format[0] == 't' && format[1] == 't' &&
4772 39 : format[2] == 'm') // strcmp(format, "ttm") == 0
4773 : {
4774 : // time32 [milliseconds]
4775 25 : int32_t value =
4776 25 : static_cast<const int32_t *>(array->buffers[1])[nOffsettedIndex];
4777 25 : double floatingPart = (value % 1000) / 1e3;
4778 25 : value /= 1000;
4779 25 : const int nHour = value / 3600;
4780 25 : const int nMinute = (value / 60) % 60;
4781 25 : const int nSecond = value % 60;
4782 25 : oFeature.SetField(iOGRFieldIndex, 0, 0, 0, nHour, nMinute,
4783 25 : static_cast<float>(nSecond + floatingPart));
4784 : }
4785 772 : else if (format[0] == 't' && format[1] == 't' &&
4786 14 : (format[2] == 'u' || // time64 [microseconds]
4787 7 : format[2] == 'n')) // time64 [nanoseconds]
4788 : {
4789 14 : int64_t value =
4790 14 : static_cast<const int64_t *>(array->buffers[1])[nOffsettedIndex];
4791 14 : if (oFeature.GetFieldDefnRef(iOGRFieldIndex)->GetType() == OFTInteger64)
4792 : {
4793 2 : oFeature.SetField(iOGRFieldIndex, static_cast<GIntBig>(value));
4794 : }
4795 : else
4796 : {
4797 : double floatingPart;
4798 12 : if (format[2] == 'u')
4799 : {
4800 5 : floatingPart = (value % (1000 * 1000)) / 1e6;
4801 5 : value /= 1000 * 1000;
4802 : }
4803 : else
4804 : {
4805 7 : floatingPart = (value % (1000 * 1000 * 1000)) / 1e9;
4806 7 : value /= 1000 * 1000 * 1000;
4807 : }
4808 12 : const int nHour = static_cast<int>(value / 3600);
4809 12 : const int nMinute = static_cast<int>((value / 60) % 60);
4810 12 : const int nSecond = static_cast<int>(value % 60);
4811 12 : oFeature.SetField(iOGRFieldIndex, 0, 0, 0, nHour, nMinute,
4812 12 : static_cast<float>(nSecond + floatingPart));
4813 14 : }
4814 : }
4815 758 : else if (IsTimestampSeconds(format))
4816 : {
4817 0 : ArrowTimestampToOGRDateTime(
4818 0 : static_cast<const int64_t *>(array->buffers[1])[nOffsettedIndex], 1,
4819 : GetTimestampTimezone(format), oFeature, iOGRFieldIndex);
4820 : }
4821 758 : else if (IsTimestampMilliseconds(format))
4822 : {
4823 73 : ArrowTimestampToOGRDateTime(
4824 73 : static_cast<const int64_t *>(array->buffers[1])[nOffsettedIndex],
4825 : 1000, GetTimestampTimezone(format), oFeature, iOGRFieldIndex);
4826 : }
4827 685 : else if (IsTimestampMicroseconds(format))
4828 : {
4829 34 : ArrowTimestampToOGRDateTime(
4830 34 : static_cast<const int64_t *>(array->buffers[1])[nOffsettedIndex],
4831 : 1000 * 1000, GetTimestampTimezone(format), oFeature,
4832 : iOGRFieldIndex);
4833 : }
4834 651 : else if (IsTimestampNanoseconds(format))
4835 : {
4836 0 : ArrowTimestampToOGRDateTime(
4837 0 : static_cast<const int64_t *>(array->buffers[1])[nOffsettedIndex],
4838 : 1000 * 1000 * 1000, GetTimestampTimezone(format), oFeature,
4839 : iOGRFieldIndex);
4840 : }
4841 651 : else if (IsFixedSizeList(format))
4842 : {
4843 154 : const int nItems = GetFixedSizeList(format);
4844 154 : const auto childArray = array->children[0];
4845 154 : const char *childFormat = schema->children[0]->format;
4846 154 : if (IsBoolean(childFormat))
4847 : {
4848 24 : std::vector<int> aValues;
4849 12 : const auto *paValues =
4850 12 : static_cast<const uint8_t *>(childArray->buffers[1]);
4851 36 : for (int i = 0; i < nItems; ++i)
4852 : {
4853 24 : aValues.push_back(
4854 24 : TestBit(paValues,
4855 24 : static_cast<size_t>(childArray->offset +
4856 24 : nOffsettedIndex * nItems + i))
4857 24 : ? 1
4858 : : 0);
4859 : }
4860 12 : oFeature.SetField(iOGRFieldIndex, static_cast<int>(aValues.size()),
4861 12 : aValues.data());
4862 : }
4863 142 : else if (IsInt8(childFormat))
4864 : {
4865 12 : FillFieldFixedSizeList<int8_t, int>(array, iOGRFieldIndex,
4866 : nOffsettedIndex, nItems,
4867 : childArray, oFeature);
4868 : }
4869 130 : else if (IsUInt8(childFormat))
4870 : {
4871 12 : FillFieldFixedSizeList<uint8_t, int>(array, iOGRFieldIndex,
4872 : nOffsettedIndex, nItems,
4873 : childArray, oFeature);
4874 : }
4875 118 : else if (IsInt16(childFormat))
4876 : {
4877 12 : FillFieldFixedSizeList<int16_t, int>(array, iOGRFieldIndex,
4878 : nOffsettedIndex, nItems,
4879 : childArray, oFeature);
4880 : }
4881 106 : else if (IsUInt16(childFormat))
4882 : {
4883 12 : FillFieldFixedSizeList<uint16_t, int>(array, iOGRFieldIndex,
4884 : nOffsettedIndex, nItems,
4885 : childArray, oFeature);
4886 : }
4887 94 : else if (IsInt32(childFormat))
4888 : {
4889 12 : FillFieldFixedSizeList<int32_t, int>(array, iOGRFieldIndex,
4890 : nOffsettedIndex, nItems,
4891 : childArray, oFeature);
4892 : }
4893 82 : else if (IsUInt32(childFormat))
4894 : {
4895 5 : FillFieldFixedSizeList<uint32_t, GIntBig>(array, iOGRFieldIndex,
4896 : nOffsettedIndex, nItems,
4897 : childArray, oFeature);
4898 : }
4899 77 : else if (IsInt64(childFormat))
4900 : {
4901 19 : FillFieldFixedSizeList<int64_t, GIntBig>(array, iOGRFieldIndex,
4902 : nOffsettedIndex, nItems,
4903 : childArray, oFeature);
4904 : }
4905 58 : else if (IsUInt64(childFormat))
4906 : {
4907 12 : FillFieldFixedSizeList<uint64_t, double>(array, iOGRFieldIndex,
4908 : nOffsettedIndex, nItems,
4909 : childArray, oFeature);
4910 : }
4911 46 : else if (IsFloat16(childFormat))
4912 : {
4913 10 : std::vector<double> aValues;
4914 15 : for (int i = 0; i < nItems; ++i)
4915 : {
4916 10 : aValues.push_back(static_cast<double>(
4917 10 : GetValueFloat16(childArray, nOffsettedIndex * nItems + i)));
4918 : }
4919 5 : oFeature.SetField(iOGRFieldIndex, static_cast<int>(aValues.size()),
4920 5 : aValues.data());
4921 : }
4922 41 : else if (IsFloat32(childFormat))
4923 : {
4924 12 : FillFieldFixedSizeList<float, double>(array, iOGRFieldIndex,
4925 : nOffsettedIndex, nItems,
4926 : childArray, oFeature);
4927 : }
4928 29 : else if (IsFloat64(childFormat))
4929 : {
4930 12 : FillFieldFixedSizeList<double, double>(array, iOGRFieldIndex,
4931 : nOffsettedIndex, nItems,
4932 : childArray, oFeature);
4933 : }
4934 17 : else if (IsString(childFormat))
4935 : {
4936 12 : FillFieldFixedSizeListString<uint32_t>(array, iOGRFieldIndex,
4937 : nOffsettedIndex, nItems,
4938 : childArray, oFeature);
4939 : }
4940 5 : else if (IsLargeString(childFormat))
4941 : {
4942 5 : FillFieldFixedSizeListString<uint64_t>(array, iOGRFieldIndex,
4943 : nOffsettedIndex, nItems,
4944 : childArray, oFeature);
4945 : }
4946 0 : else if (IsStringView(childFormat))
4947 : {
4948 0 : FillFieldFixedSizeListStringView(array, iOGRFieldIndex,
4949 : nOffsettedIndex, nItems,
4950 : childArray, oFeature);
4951 : }
4952 : }
4953 497 : else if (IsList(format) || IsLargeList(format))
4954 : {
4955 254 : const auto childArray = array->children[0];
4956 254 : const char *childFormat = schema->children[0]->format;
4957 254 : if (IsBoolean(childFormat))
4958 : {
4959 16 : if (format[1] == ARROW_2ND_LETTER_LIST)
4960 12 : FillFieldListFromBool<uint32_t>(array, iOGRFieldIndex,
4961 : nOffsettedIndex, childArray,
4962 : oFeature);
4963 : else
4964 4 : FillFieldListFromBool<uint64_t>(array, iOGRFieldIndex,
4965 : nOffsettedIndex, childArray,
4966 : oFeature);
4967 : }
4968 238 : else if (IsInt8(childFormat))
4969 : {
4970 14 : if (format[1] == ARROW_2ND_LETTER_LIST)
4971 10 : FillFieldList<uint32_t, int8_t, int>(array, iOGRFieldIndex,
4972 : nOffsettedIndex,
4973 : childArray, oFeature);
4974 : else
4975 4 : FillFieldList<uint64_t, int8_t, int>(array, iOGRFieldIndex,
4976 : nOffsettedIndex,
4977 : childArray, oFeature);
4978 : }
4979 224 : else if (IsUInt8(childFormat))
4980 : {
4981 18 : if (format[1] == ARROW_2ND_LETTER_LIST)
4982 14 : FillFieldList<uint32_t, uint8_t, int>(array, iOGRFieldIndex,
4983 : nOffsettedIndex,
4984 : childArray, oFeature);
4985 : else
4986 4 : FillFieldList<uint64_t, uint8_t, int>(array, iOGRFieldIndex,
4987 : nOffsettedIndex,
4988 : childArray, oFeature);
4989 : }
4990 206 : else if (IsInt16(childFormat))
4991 : {
4992 16 : if (format[1] == ARROW_2ND_LETTER_LIST)
4993 12 : FillFieldList<uint32_t, int16_t, int>(array, iOGRFieldIndex,
4994 : nOffsettedIndex,
4995 : childArray, oFeature);
4996 : else
4997 4 : FillFieldList<uint64_t, int16_t, int>(array, iOGRFieldIndex,
4998 : nOffsettedIndex,
4999 : childArray, oFeature);
5000 : }
5001 190 : else if (IsUInt16(childFormat))
5002 : {
5003 14 : if (format[1] == ARROW_2ND_LETTER_LIST)
5004 10 : FillFieldList<uint32_t, uint16_t, int>(array, iOGRFieldIndex,
5005 : nOffsettedIndex,
5006 : childArray, oFeature);
5007 : else
5008 4 : FillFieldList<uint64_t, uint16_t, int>(array, iOGRFieldIndex,
5009 : nOffsettedIndex,
5010 : childArray, oFeature);
5011 : }
5012 176 : else if (IsInt32(childFormat))
5013 : {
5014 18 : if (format[1] == ARROW_2ND_LETTER_LIST)
5015 14 : FillFieldList<uint32_t, int32_t, int>(array, iOGRFieldIndex,
5016 : nOffsettedIndex,
5017 : childArray, oFeature);
5018 : else
5019 4 : FillFieldList<uint64_t, int32_t, int>(array, iOGRFieldIndex,
5020 : nOffsettedIndex,
5021 : childArray, oFeature);
5022 : }
5023 158 : else if (IsUInt32(childFormat))
5024 : {
5025 8 : if (format[1] == ARROW_2ND_LETTER_LIST)
5026 4 : FillFieldList<uint32_t, uint32_t, GIntBig>(
5027 : array, iOGRFieldIndex, nOffsettedIndex, childArray,
5028 : oFeature);
5029 : else
5030 4 : FillFieldList<uint64_t, uint32_t, GIntBig>(
5031 : array, iOGRFieldIndex, nOffsettedIndex, childArray,
5032 : oFeature);
5033 : }
5034 150 : else if (IsInt64(childFormat))
5035 : {
5036 31 : if (format[1] == ARROW_2ND_LETTER_LIST)
5037 27 : FillFieldList<uint32_t, int64_t, GIntBig>(array, iOGRFieldIndex,
5038 : nOffsettedIndex,
5039 : childArray, oFeature);
5040 : else
5041 4 : FillFieldList<uint64_t, int64_t, GIntBig>(array, iOGRFieldIndex,
5042 : nOffsettedIndex,
5043 : childArray, oFeature);
5044 : }
5045 119 : else if (IsUInt64(childFormat)) // (lossy conversion)
5046 : {
5047 14 : if (format[1] == ARROW_2ND_LETTER_LIST)
5048 10 : FillFieldList<uint32_t, uint64_t, double>(array, iOGRFieldIndex,
5049 : nOffsettedIndex,
5050 : childArray, oFeature);
5051 : else
5052 4 : FillFieldList<uint64_t, uint64_t, double>(array, iOGRFieldIndex,
5053 : nOffsettedIndex,
5054 : childArray, oFeature);
5055 : }
5056 105 : else if (IsFloat16(childFormat))
5057 : {
5058 8 : if (format[1] == ARROW_2ND_LETTER_LIST)
5059 4 : FillFieldListFromHalfFloat<uint32_t>(array, iOGRFieldIndex,
5060 : nOffsettedIndex,
5061 : childArray, oFeature);
5062 : else
5063 4 : FillFieldListFromHalfFloat<uint64_t>(array, iOGRFieldIndex,
5064 : nOffsettedIndex,
5065 : childArray, oFeature);
5066 : }
5067 97 : else if (IsFloat32(childFormat))
5068 : {
5069 16 : if (format[1] == ARROW_2ND_LETTER_LIST)
5070 12 : FillFieldList<uint32_t, float, double>(array, iOGRFieldIndex,
5071 : nOffsettedIndex,
5072 : childArray, oFeature);
5073 : else
5074 4 : FillFieldList<uint64_t, float, double>(array, iOGRFieldIndex,
5075 : nOffsettedIndex,
5076 : childArray, oFeature);
5077 : }
5078 81 : else if (IsFloat64(childFormat))
5079 : {
5080 18 : if (format[1] == ARROW_2ND_LETTER_LIST)
5081 14 : FillFieldList<uint32_t, double, double>(array, iOGRFieldIndex,
5082 : nOffsettedIndex,
5083 : childArray, oFeature);
5084 : else
5085 4 : FillFieldList<uint64_t, double, double>(array, iOGRFieldIndex,
5086 : nOffsettedIndex,
5087 : childArray, oFeature);
5088 : }
5089 63 : else if (IsString(childFormat))
5090 : {
5091 18 : if (format[1] == ARROW_2ND_LETTER_LIST)
5092 14 : FillFieldListFromString<uint32_t, uint32_t>(
5093 : array, iOGRFieldIndex, nOffsettedIndex, childArray,
5094 : oFeature);
5095 : else
5096 4 : FillFieldListFromString<uint64_t, uint32_t>(
5097 : array, iOGRFieldIndex, nOffsettedIndex, childArray,
5098 : oFeature);
5099 : }
5100 45 : else if (IsLargeString(childFormat))
5101 : {
5102 14 : if (format[1] == ARROW_2ND_LETTER_LIST)
5103 10 : FillFieldListFromString<uint32_t, uint64_t>(
5104 : array, iOGRFieldIndex, nOffsettedIndex, childArray,
5105 : oFeature);
5106 : else
5107 4 : FillFieldListFromString<uint64_t, uint64_t>(
5108 : array, iOGRFieldIndex, nOffsettedIndex, childArray,
5109 : oFeature);
5110 : }
5111 31 : else if (IsStringView(childFormat))
5112 : {
5113 0 : if (format[1] == ARROW_2ND_LETTER_LIST)
5114 0 : FillFieldListFromStringView<uint32_t>(array, iOGRFieldIndex,
5115 : nOffsettedIndex,
5116 : childArray, oFeature);
5117 : else
5118 0 : FillFieldListFromStringView<uint64_t>(array, iOGRFieldIndex,
5119 : nOffsettedIndex,
5120 : childArray, oFeature);
5121 : }
5122 :
5123 31 : else if (format[1] == ARROW_2ND_LETTER_LIST)
5124 : {
5125 31 : const size_t iFeature =
5126 31 : static_cast<size_t>(nOffsettedIndex - array->offset);
5127 31 : oFeature.SetField(iOGRFieldIndex,
5128 62 : GetListAsJSON<uint32_t>(schema, array, iFeature)
5129 62 : .Format(CPLJSONObject::PrettyFormat::Plain)
5130 : .c_str());
5131 : }
5132 : else
5133 : {
5134 0 : const size_t iFeature =
5135 0 : static_cast<size_t>(nOffsettedIndex - array->offset);
5136 0 : oFeature.SetField(iOGRFieldIndex,
5137 0 : GetListAsJSON<uint64_t>(schema, array, iFeature)
5138 0 : .Format(CPLJSONObject::PrettyFormat::Plain)
5139 : .c_str());
5140 : }
5141 : }
5142 243 : else if (IsDecimal(format))
5143 : {
5144 4 : int nPrecision = 0;
5145 4 : int nScale = 0;
5146 4 : int nWidthInBytes = 0;
5147 4 : if (!ParseDecimalFormat(format, nPrecision, nScale, nWidthInBytes))
5148 : {
5149 0 : CPLAssert(false);
5150 : }
5151 :
5152 : // fits on a int64
5153 4 : CPLAssert(nPrecision <= 19);
5154 : // either 128 or 256 bits
5155 4 : CPLAssert((nWidthInBytes % 8) == 0);
5156 4 : const int nWidthIn64BitWord = nWidthInBytes / 8;
5157 4 : const size_t iFeature =
5158 4 : static_cast<size_t>(nOffsettedIndex - array->offset);
5159 4 : oFeature.SetField(
5160 : iOGRFieldIndex,
5161 : GetValueDecimal(array, nWidthIn64BitWord, nScale, iFeature));
5162 4 : return true;
5163 : }
5164 239 : else if (IsMap(format))
5165 : {
5166 239 : const size_t iFeature =
5167 239 : static_cast<size_t>(nOffsettedIndex - array->offset);
5168 239 : oFeature.SetField(iOGRFieldIndex,
5169 478 : GetMapAsJSON(schema, array, iFeature)
5170 478 : .Format(CPLJSONObject::PrettyFormat::Plain)
5171 : .c_str());
5172 : }
5173 : else
5174 : {
5175 0 : return false;
5176 : }
5177 :
5178 819 : return true;
5179 : }
5180 :
5181 : /************************************************************************/
5182 : /* FillValidityArrayFromAttrQuery() */
5183 : /************************************************************************/
5184 :
5185 134 : static size_t FillValidityArrayFromAttrQuery(
5186 : const OGRLayer *poLayer, OGRFeatureQuery *poAttrQuery,
5187 : const struct ArrowSchema *schema, struct ArrowArray *array,
5188 : std::vector<bool> &abyValidityFromFilters, CSLConstList papszOptions)
5189 : {
5190 134 : size_t nCountIntersecting = 0;
5191 134 : auto poFeatureDefn = const_cast<OGRLayer *>(poLayer)->GetLayerDefn();
5192 268 : OGRFeature oFeature(poFeatureDefn);
5193 :
5194 268 : std::map<std::string, std::vector<int>> oMapFieldNameToArrowPath;
5195 268 : std::vector<int> anArrowPathTmp;
5196 134 : BuildMapFieldNameToArrowPath(schema, oMapFieldNameToArrowPath,
5197 268 : std::string(), anArrowPathTmp);
5198 :
5199 : struct UsedFieldsInfo
5200 : {
5201 : int iOGRFieldIndex{};
5202 : std::vector<int> anArrowPath{};
5203 : };
5204 :
5205 268 : std::vector<UsedFieldsInfo> aoUsedFieldsInfo;
5206 :
5207 134 : bool bNeedsFID = false;
5208 268 : const CPLStringList aosUsedFields(poAttrQuery->GetUsedFields());
5209 252 : for (int i = 0; i < aosUsedFields.size(); ++i)
5210 : {
5211 118 : int iOGRFieldIndex = poFeatureDefn->GetFieldIndex(aosUsedFields[i]);
5212 118 : if (iOGRFieldIndex >= 0)
5213 : {
5214 112 : const auto oIter = oMapFieldNameToArrowPath.find(aosUsedFields[i]);
5215 112 : if (oIter != oMapFieldNameToArrowPath.end())
5216 : {
5217 224 : UsedFieldsInfo info;
5218 112 : info.iOGRFieldIndex = iOGRFieldIndex;
5219 112 : info.anArrowPath = oIter->second;
5220 112 : aoUsedFieldsInfo.push_back(std::move(info));
5221 : }
5222 : else
5223 : {
5224 0 : CPLError(CE_Failure, CPLE_AppDefined,
5225 : "Cannot find %s in oMapFieldNameToArrowPath",
5226 : aosUsedFields[i]);
5227 : }
5228 : }
5229 6 : else if (EQUAL(aosUsedFields[i], "FID"))
5230 : {
5231 6 : bNeedsFID = true;
5232 : }
5233 : else
5234 : {
5235 0 : CPLDebug("OGR", "Cannot find used field %s", aosUsedFields[i]);
5236 : }
5237 : }
5238 :
5239 134 : const size_t nLength = abyValidityFromFilters.size();
5240 :
5241 134 : GIntBig nBaseSeqFID = -1;
5242 268 : std::vector<int> anArrowPathToFIDColumn;
5243 134 : if (bNeedsFID)
5244 : {
5245 : // BASE_SEQUENTIAL_FID is set when there is no Arrow column for the FID
5246 : // and we assume sequential FID numbering
5247 : const char *pszBaseSeqFID =
5248 6 : CSLFetchNameValue(papszOptions, "BASE_SEQUENTIAL_FID");
5249 6 : if (pszBaseSeqFID)
5250 : {
5251 5 : nBaseSeqFID = CPLAtoGIntBig(pszBaseSeqFID);
5252 :
5253 : // Optimizimation for "FID = constant"
5254 : swq_expr_node *poNode =
5255 5 : static_cast<swq_expr_node *>(poAttrQuery->GetSWQExpr());
5256 15 : if (poNode->eNodeType == SNT_OPERATION &&
5257 5 : poNode->nOperation == SWQ_EQ && poNode->nSubExprCount == 2 &&
5258 2 : poNode->papoSubExpr[0]->eNodeType == SNT_COLUMN &&
5259 2 : poNode->papoSubExpr[1]->eNodeType == SNT_CONSTANT &&
5260 2 : poNode->papoSubExpr[0]->field_index ==
5261 12 : poFeatureDefn->GetFieldCount() + SPF_FID &&
5262 2 : poNode->papoSubExpr[1]->field_type == SWQ_INTEGER64)
5263 : {
5264 2 : if (nBaseSeqFID + static_cast<int64_t>(nLength) <
5265 2 : poNode->papoSubExpr[1]->int_value ||
5266 2 : nBaseSeqFID > poNode->papoSubExpr[1]->int_value)
5267 : {
5268 0 : return 0;
5269 : }
5270 : }
5271 : }
5272 : else
5273 : {
5274 : const char *pszFIDColumn =
5275 1 : const_cast<OGRLayer *>(poLayer)->GetFIDColumn();
5276 1 : if (pszFIDColumn && pszFIDColumn[0])
5277 : {
5278 1 : const auto oIter = oMapFieldNameToArrowPath.find(pszFIDColumn);
5279 1 : if (oIter != oMapFieldNameToArrowPath.end())
5280 : {
5281 1 : anArrowPathToFIDColumn = oIter->second;
5282 : }
5283 : }
5284 1 : if (anArrowPathToFIDColumn.empty())
5285 : {
5286 0 : CPLError(CE_Failure, CPLE_AppDefined,
5287 : "Filtering on FID requested but cannot associate a "
5288 : "FID with Arrow records");
5289 : }
5290 : }
5291 : }
5292 :
5293 555 : for (size_t iRow = 0; iRow < nLength; ++iRow)
5294 : {
5295 421 : if (!abyValidityFromFilters[iRow])
5296 2 : continue;
5297 :
5298 419 : if (bNeedsFID)
5299 : {
5300 21 : if (nBaseSeqFID >= 0)
5301 : {
5302 11 : oFeature.SetFID(nBaseSeqFID + iRow);
5303 : }
5304 10 : else if (!anArrowPathToFIDColumn.empty())
5305 : {
5306 10 : oFeature.SetFID(OGRNullFID);
5307 :
5308 10 : const struct ArrowSchema *psSchemaField = schema;
5309 10 : const struct ArrowArray *psArray = array;
5310 10 : bool bSkip = false;
5311 20 : for (size_t i = 0; i < anArrowPathToFIDColumn.size(); ++i)
5312 : {
5313 10 : const int iChild = anArrowPathToFIDColumn[i];
5314 10 : if (i > 0)
5315 : {
5316 0 : const uint8_t *pabyValidity =
5317 0 : psArray->null_count == 0
5318 0 : ? nullptr
5319 : : static_cast<uint8_t *>(
5320 0 : const_cast<void *>(psArray->buffers[0]));
5321 0 : const size_t nOffsettedIndex =
5322 0 : static_cast<size_t>(iRow + psArray->offset);
5323 0 : if (pabyValidity &&
5324 0 : !TestBit(pabyValidity, nOffsettedIndex))
5325 : {
5326 0 : bSkip = true;
5327 0 : break;
5328 : }
5329 : }
5330 :
5331 10 : psSchemaField = psSchemaField->children[iChild];
5332 10 : psArray = psArray->children[iChild];
5333 : }
5334 10 : if (bSkip)
5335 0 : continue;
5336 :
5337 10 : const char *format = psSchemaField->format;
5338 10 : const uint8_t *pabyValidity =
5339 10 : psArray->null_count == 0
5340 10 : ? nullptr
5341 : : static_cast<uint8_t *>(
5342 0 : const_cast<void *>(psArray->buffers[0]));
5343 10 : const size_t nOffsettedIndex =
5344 10 : static_cast<size_t>(iRow + psArray->offset);
5345 10 : if (pabyValidity && !TestBit(pabyValidity, nOffsettedIndex))
5346 : {
5347 : // do nothing
5348 : }
5349 10 : else if (IsInt32(format))
5350 : {
5351 0 : oFeature.SetFID(static_cast<const int32_t *>(
5352 0 : psArray->buffers[1])[nOffsettedIndex]);
5353 : }
5354 10 : else if (IsInt64(format))
5355 : {
5356 10 : oFeature.SetFID(static_cast<const int64_t *>(
5357 10 : psArray->buffers[1])[nOffsettedIndex]);
5358 : }
5359 : }
5360 : }
5361 :
5362 725 : for (const auto &sInfo : aoUsedFieldsInfo)
5363 : {
5364 306 : const int iOGRFieldIndex = sInfo.iOGRFieldIndex;
5365 306 : const struct ArrowSchema *psSchemaField = schema;
5366 306 : const struct ArrowArray *psArray = array;
5367 306 : bool bSkip = false;
5368 612 : for (size_t i = 0; i < sInfo.anArrowPath.size(); ++i)
5369 : {
5370 306 : const int iChild = sInfo.anArrowPath[i];
5371 306 : if (i > 0)
5372 : {
5373 0 : const uint8_t *pabyValidity =
5374 0 : psArray->null_count == 0
5375 0 : ? nullptr
5376 : : static_cast<uint8_t *>(
5377 0 : const_cast<void *>(psArray->buffers[0]));
5378 0 : const size_t nOffsettedIndex =
5379 0 : static_cast<size_t>(iRow + psArray->offset);
5380 0 : if (pabyValidity && !TestBit(pabyValidity, nOffsettedIndex))
5381 : {
5382 0 : bSkip = true;
5383 0 : oFeature.SetFieldNull(iOGRFieldIndex);
5384 0 : break;
5385 : }
5386 : }
5387 :
5388 306 : psSchemaField = psSchemaField->children[iChild];
5389 306 : psArray = psArray->children[iChild];
5390 : }
5391 306 : if (bSkip)
5392 0 : continue;
5393 :
5394 306 : const char *format = psSchemaField->format;
5395 306 : const uint8_t *pabyValidity =
5396 306 : psArray->null_count == 0
5397 306 : ? nullptr
5398 : : static_cast<uint8_t *>(
5399 129 : const_cast<void *>(psArray->buffers[0]));
5400 306 : const size_t nOffsettedIndex =
5401 306 : static_cast<size_t>(iRow + psArray->offset);
5402 306 : if (pabyValidity && !TestBit(pabyValidity, nOffsettedIndex))
5403 : {
5404 38 : oFeature.SetFieldNull(iOGRFieldIndex);
5405 : }
5406 268 : else if (IsBoolean(format))
5407 : {
5408 78 : oFeature.SetField(
5409 : iOGRFieldIndex,
5410 78 : TestBit(static_cast<const uint8_t *>(psArray->buffers[1]),
5411 : nOffsettedIndex));
5412 : }
5413 190 : else if (IsInt8(format))
5414 : {
5415 8 : oFeature.SetField(iOGRFieldIndex,
5416 8 : static_cast<const int8_t *>(
5417 8 : psArray->buffers[1])[nOffsettedIndex]);
5418 : }
5419 182 : else if (IsUInt8(format))
5420 : {
5421 4 : oFeature.SetField(iOGRFieldIndex,
5422 4 : static_cast<const uint8_t *>(
5423 4 : psArray->buffers[1])[nOffsettedIndex]);
5424 : }
5425 178 : else if (IsInt16(format))
5426 : {
5427 16 : oFeature.SetField(iOGRFieldIndex,
5428 16 : static_cast<const int16_t *>(
5429 16 : psArray->buffers[1])[nOffsettedIndex]);
5430 : }
5431 162 : else if (IsUInt16(format))
5432 : {
5433 2 : oFeature.SetField(iOGRFieldIndex,
5434 2 : static_cast<const uint16_t *>(
5435 2 : psArray->buffers[1])[nOffsettedIndex]);
5436 : }
5437 160 : else if (IsInt32(format))
5438 : {
5439 10 : oFeature.SetField(iOGRFieldIndex,
5440 10 : static_cast<const int32_t *>(
5441 10 : psArray->buffers[1])[nOffsettedIndex]);
5442 : }
5443 150 : else if (IsUInt32(format))
5444 : {
5445 0 : oFeature.SetField(
5446 : iOGRFieldIndex,
5447 0 : static_cast<GIntBig>(static_cast<const uint32_t *>(
5448 0 : psArray->buffers[1])[nOffsettedIndex]));
5449 : }
5450 150 : else if (IsInt64(format))
5451 : {
5452 4 : oFeature.SetField(
5453 : iOGRFieldIndex,
5454 4 : static_cast<GIntBig>(static_cast<const int64_t *>(
5455 4 : psArray->buffers[1])[nOffsettedIndex]));
5456 : }
5457 146 : else if (IsUInt64(format))
5458 : {
5459 4 : oFeature.SetField(
5460 : iOGRFieldIndex,
5461 4 : static_cast<double>(static_cast<const uint64_t *>(
5462 4 : psArray->buffers[1])[nOffsettedIndex]));
5463 : }
5464 142 : else if (IsFloat32(format))
5465 : {
5466 2 : oFeature.SetField(
5467 : iOGRFieldIndex,
5468 2 : static_cast<double>(static_cast<const float *>(
5469 2 : psArray->buffers[1])[nOffsettedIndex]));
5470 : }
5471 140 : else if (IsFloat64(format))
5472 : {
5473 26 : oFeature.SetField(iOGRFieldIndex,
5474 26 : static_cast<const double *>(
5475 26 : psArray->buffers[1])[nOffsettedIndex]);
5476 : }
5477 114 : else if (IsString(format))
5478 : {
5479 18 : const auto nOffset = static_cast<const uint32_t *>(
5480 18 : psArray->buffers[1])[nOffsettedIndex];
5481 18 : const auto nNextOffset = static_cast<const uint32_t *>(
5482 18 : psArray->buffers[1])[nOffsettedIndex + 1];
5483 18 : const GByte *pabyData =
5484 18 : static_cast<const GByte *>(psArray->buffers[2]);
5485 18 : const uint32_t nSize = nNextOffset - nOffset;
5486 18 : CPLAssert(oFeature.GetFieldDefnRef(iOGRFieldIndex)->GetType() ==
5487 : OFTString);
5488 18 : char *pszStr = static_cast<char *>(CPLMalloc(nSize + 1));
5489 18 : memcpy(pszStr, pabyData + nOffset, nSize);
5490 18 : pszStr[nSize] = 0;
5491 18 : OGRField *psField = oFeature.GetRawFieldRef(iOGRFieldIndex);
5492 18 : if (IsValidField(psField))
5493 12 : CPLFree(psField->String);
5494 18 : psField->String = pszStr;
5495 : }
5496 96 : else if (IsStringView(format))
5497 : {
5498 : // Cf https://arrow.apache.org/docs/format/Columnar.html#variable-size-binary-view-layout
5499 0 : CPLAssert(oFeature.GetFieldDefnRef(iOGRFieldIndex)->GetType() ==
5500 : OFTString);
5501 0 : const auto strView = GetStringView(psArray, iRow);
5502 0 : const auto nSize = strView.size();
5503 0 : char *pszStr = static_cast<char *>(CPLMalloc(nSize + 1));
5504 0 : memcpy(pszStr, strView.data(), nSize);
5505 0 : pszStr[nSize] = 0;
5506 0 : OGRField *psField = oFeature.GetRawFieldRef(iOGRFieldIndex);
5507 0 : if (IsValidField(psField))
5508 0 : CPLFree(psField->String);
5509 0 : psField->String = pszStr;
5510 : }
5511 96 : else if (IsLargeString(format))
5512 : {
5513 6 : const auto nOffset = static_cast<const uint64_t *>(
5514 6 : psArray->buffers[1])[nOffsettedIndex];
5515 6 : const auto nNextOffset = static_cast<const uint64_t *>(
5516 6 : psArray->buffers[1])[nOffsettedIndex + 1];
5517 6 : const GByte *pabyData =
5518 6 : static_cast<const GByte *>(psArray->buffers[2]);
5519 6 : const size_t nSize = static_cast<size_t>(nNextOffset - nOffset);
5520 6 : char *pszStr = static_cast<char *>(CPLMalloc(nSize + 1));
5521 6 : memcpy(pszStr, pabyData + static_cast<size_t>(nOffset), nSize);
5522 6 : pszStr[nSize] = 0;
5523 6 : OGRField *psField = oFeature.GetRawFieldRef(iOGRFieldIndex);
5524 6 : if (IsValidField(psField))
5525 3 : CPLFree(psField->String);
5526 6 : psField->String = pszStr;
5527 : }
5528 90 : else if (IsBinary(format))
5529 : {
5530 5 : const auto nOffset = static_cast<const uint32_t *>(
5531 5 : psArray->buffers[1])[nOffsettedIndex];
5532 5 : const auto nNextOffset = static_cast<const uint32_t *>(
5533 5 : psArray->buffers[1])[nOffsettedIndex + 1];
5534 5 : const GByte *pabyData =
5535 5 : static_cast<const GByte *>(psArray->buffers[2]);
5536 5 : const uint32_t nSize = nNextOffset - nOffset;
5537 10 : if (nSize >
5538 5 : static_cast<size_t>(std::numeric_limits<int32_t>::max()))
5539 : {
5540 0 : abyValidityFromFilters.clear();
5541 0 : abyValidityFromFilters.resize(nLength);
5542 0 : CPLError(CE_Failure, CPLE_AppDefined,
5543 : "Unexpected error in PostFilterArrowArray(): too "
5544 : "large binary");
5545 0 : return 0;
5546 : }
5547 5 : oFeature.SetField(iOGRFieldIndex, static_cast<int>(nSize),
5548 5 : pabyData + nOffset);
5549 : }
5550 85 : else if (IsLargeBinary(format))
5551 : {
5552 5 : const auto nOffset = static_cast<const uint64_t *>(
5553 5 : psArray->buffers[1])[nOffsettedIndex];
5554 5 : const auto nNextOffset = static_cast<const uint64_t *>(
5555 5 : psArray->buffers[1])[nOffsettedIndex + 1];
5556 5 : const GByte *pabyData =
5557 5 : static_cast<const GByte *>(psArray->buffers[2]);
5558 5 : const uint64_t nSize = nNextOffset - nOffset;
5559 5 : if (nSize >
5560 5 : static_cast<uint64_t>(std::numeric_limits<int32_t>::max()))
5561 : {
5562 0 : abyValidityFromFilters.clear();
5563 0 : abyValidityFromFilters.resize(nLength);
5564 0 : CPLError(CE_Failure, CPLE_AppDefined,
5565 : "Unexpected error in PostFilterArrowArray(): too "
5566 : "large binary");
5567 0 : return 0;
5568 : }
5569 5 : oFeature.SetField(iOGRFieldIndex, static_cast<int>(nSize),
5570 5 : pabyData + nOffset);
5571 : }
5572 80 : else if (!SetFieldForOtherFormats(oFeature, iOGRFieldIndex,
5573 : nOffsettedIndex, psSchemaField,
5574 : psArray))
5575 : {
5576 0 : abyValidityFromFilters.clear();
5577 0 : abyValidityFromFilters.resize(nLength);
5578 0 : CPLError(
5579 : CE_Failure, CPLE_AppDefined,
5580 : "Unexpected error in PostFilterArrowArray(): unhandled "
5581 : "field format: %s",
5582 : format);
5583 0 : return 0;
5584 : }
5585 : }
5586 419 : if (poAttrQuery->Evaluate(&oFeature))
5587 : {
5588 215 : nCountIntersecting++;
5589 : }
5590 : else
5591 : {
5592 204 : abyValidityFromFilters[iRow] = false;
5593 : }
5594 : }
5595 134 : return nCountIntersecting;
5596 : }
5597 :
5598 : /************************************************************************/
5599 : /* OGRLayer::PostFilterArrowArray() */
5600 : /************************************************************************/
5601 :
5602 : /** Remove rows that aren't selected by the spatial or attribute filter.
5603 : *
5604 : * Assumes that CanPostFilterArrowArray() has been called and returned true.
5605 : */
5606 153 : void OGRLayer::PostFilterArrowArray(const struct ArrowSchema *schema,
5607 : struct ArrowArray *array,
5608 : CSLConstList papszOptions) const
5609 : {
5610 153 : if (!m_poFilterGeom && !m_poAttrQuery)
5611 43 : return;
5612 :
5613 153 : CPLAssert(schema->n_children == array->n_children);
5614 :
5615 153 : int64_t iGeomField = -1;
5616 153 : if (m_poFilterGeom)
5617 : {
5618 : const char *pszGeomFieldName =
5619 : const_cast<OGRLayer *>(this)
5620 21 : ->GetLayerDefn()
5621 21 : ->GetGeomFieldDefn(m_iGeomFieldFilter)
5622 21 : ->GetNameRef();
5623 837 : for (int64_t iField = 0; iField < schema->n_children; ++iField)
5624 : {
5625 837 : const auto fieldSchema = schema->children[iField];
5626 837 : if (strcmp(fieldSchema->name, pszGeomFieldName) == 0)
5627 : {
5628 21 : iGeomField = iField;
5629 21 : break;
5630 : }
5631 816 : CPLAssert(array->children[iField]->length ==
5632 : array->children[0]->length);
5633 : }
5634 : // Guaranteed if CanPostFilterArrowArray() returned true
5635 21 : CPLAssert(iGeomField >= 0);
5636 21 : CPLAssert(IsBinary(schema->children[iGeomField]->format) ||
5637 : IsLargeBinary(schema->children[iGeomField]->format));
5638 21 : CPLAssert(array->children[iGeomField]->n_buffers == 3);
5639 : }
5640 :
5641 153 : std::vector<bool> abyValidityFromFilters;
5642 153 : const size_t nLength = static_cast<size_t>(array->length);
5643 : const size_t nCountIntersectingGeom =
5644 174 : m_poFilterGeom ? (IsBinary(schema->children[iGeomField]->format)
5645 42 : ? FillValidityArrayFromWKBArray<uint32_t>(
5646 21 : array->children[iGeomField], this,
5647 : abyValidityFromFilters)
5648 0 : : FillValidityArrayFromWKBArray<uint64_t>(
5649 0 : array->children[iGeomField], this,
5650 : abyValidityFromFilters))
5651 153 : : nLength;
5652 153 : if (!m_poFilterGeom)
5653 132 : abyValidityFromFilters.resize(nLength, true);
5654 : const size_t nCountIntersecting =
5655 134 : m_poAttrQuery && nCountIntersectingGeom > 0
5656 306 : ? FillValidityArrayFromAttrQuery(this, m_poAttrQuery, schema, array,
5657 : abyValidityFromFilters,
5658 : papszOptions)
5659 19 : : m_poFilterGeom ? nCountIntersectingGeom
5660 153 : : nLength;
5661 : // Nothing to do ?
5662 153 : if (nCountIntersecting == nLength)
5663 : {
5664 : // CPLDebug("OGR", "All rows match filter");
5665 43 : return;
5666 : }
5667 :
5668 110 : if (nCountIntersecting == 0)
5669 : {
5670 27 : array->length = 0;
5671 : }
5672 83 : else if (!CompactStructArray(schema, array, 0, abyValidityFromFilters,
5673 : nCountIntersecting))
5674 : {
5675 0 : array->release(array);
5676 0 : memset(array, 0, sizeof(*array));
5677 : }
5678 : }
5679 :
5680 : /************************************************************************/
5681 : /* OGRCloneArrowArray */
5682 : /************************************************************************/
5683 :
5684 14093 : static bool OGRCloneArrowArray(const struct ArrowSchema *schema,
5685 : const struct ArrowArray *src_array,
5686 : struct ArrowArray *out_array,
5687 : size_t nParentOffset)
5688 : {
5689 14093 : memset(out_array, 0, sizeof(*out_array));
5690 14093 : const size_t nLength =
5691 14093 : static_cast<size_t>(src_array->length) - nParentOffset;
5692 14093 : out_array->length = nLength;
5693 14093 : out_array->null_count = src_array->null_count;
5694 14093 : out_array->release = OGRLayerDefaultReleaseArray;
5695 :
5696 14093 : bool bRet = true;
5697 :
5698 14093 : out_array->n_buffers = src_array->n_buffers;
5699 28186 : out_array->buffers = static_cast<const void **>(CPLCalloc(
5700 14093 : static_cast<size_t>(src_array->n_buffers), sizeof(const void *)));
5701 14093 : CPLAssert(static_cast<size_t>(src_array->length) >= nParentOffset);
5702 14093 : const char *format = schema->format;
5703 14093 : const auto nOffset = static_cast<size_t>(src_array->offset) + nParentOffset;
5704 41917 : for (int64_t i = 0; i < src_array->n_buffers; ++i)
5705 : {
5706 27824 : if (i == 0 || IsBoolean(format))
5707 : {
5708 14464 : if (i == 1)
5709 : {
5710 371 : CPLAssert(src_array->buffers[i]);
5711 : }
5712 14464 : if (src_array->buffers[i])
5713 : {
5714 8911 : const size_t nBytes = nLength ? (nLength + 7) / 8 : 1;
5715 : uint8_t *CPL_RESTRICT p = static_cast<uint8_t *>(
5716 8911 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nBytes));
5717 8911 : if (!p)
5718 : {
5719 0 : bRet = false;
5720 0 : break;
5721 : }
5722 8911 : const auto *CPL_RESTRICT pSrcArray =
5723 8911 : static_cast<const uint8_t *>(src_array->buffers[i]);
5724 8911 : if ((nOffset % 8) != 0)
5725 : {
5726 : // Make sure last byte is fully initialized
5727 2281 : p[nBytes - 1] = 0;
5728 7359 : for (size_t iRow = 0; iRow < nLength; ++iRow)
5729 : {
5730 5078 : if (TestBit(pSrcArray, nOffset + iRow))
5731 4949 : SetBit(p, iRow);
5732 : else
5733 129 : UnsetBit(p, iRow);
5734 : }
5735 : }
5736 : else
5737 : {
5738 6630 : memcpy(p, pSrcArray + nOffset / 8, nBytes);
5739 : }
5740 8911 : out_array->buffers[i] = p;
5741 : }
5742 : }
5743 13360 : else if (i == 1)
5744 : {
5745 11229 : CPLAssert(src_array->buffers[i]);
5746 11229 : size_t nEltSize = 0;
5747 11229 : size_t nExtraElt = 0;
5748 11229 : if (IsUInt8(format) || IsInt8(format))
5749 742 : nEltSize = sizeof(uint8_t);
5750 10487 : else if (IsUInt16(format) || IsInt16(format) || IsFloat16(format))
5751 762 : nEltSize = sizeof(uint16_t);
5752 19430 : else if (IsUInt32(format) || IsInt32(format) || IsFloat32(format) ||
5753 28056 : strcmp(format, "tdD") == 0 || strcmp(format, "tts") == 0 ||
5754 8626 : strcmp(format, "ttm") == 0)
5755 : {
5756 1316 : nEltSize = sizeof(uint32_t);
5757 : }
5758 13011 : else if (IsString(format) || IsBinary(format) || IsList(format) ||
5759 4602 : IsMap(format))
5760 : {
5761 4496 : nEltSize = sizeof(uint32_t);
5762 4496 : nExtraElt = 1;
5763 : }
5764 7455 : else if (IsUInt64(format) || IsInt64(format) || IsFloat64(format) ||
5765 1648 : strcmp(format, "tdm") == 0 || strcmp(format, "ttu") == 0 ||
5766 7455 : strcmp(format, "ttn") == 0 || IsTimestamp(format))
5767 : {
5768 3085 : nEltSize = sizeof(uint64_t);
5769 : }
5770 1318 : else if (IsLargeString(format) || IsLargeBinary(format) ||
5771 490 : IsLargeList(format))
5772 : {
5773 343 : nEltSize = sizeof(uint64_t);
5774 343 : nExtraElt = 1;
5775 : }
5776 485 : else if (IsFixedWidthBinary(format))
5777 : {
5778 111 : nEltSize = GetFixedWithBinary(format);
5779 : }
5780 374 : else if (IsDecimal(format))
5781 : {
5782 374 : int nPrecision = 0;
5783 374 : int nScale = 0;
5784 374 : int nWidthInBytes = 0;
5785 374 : if (!ParseDecimalFormat(format, nPrecision, nScale,
5786 : nWidthInBytes))
5787 : {
5788 0 : CPLError(
5789 : CE_Failure, CPLE_AppDefined,
5790 : "Unexpected error in OGRCloneArrowArray(): unhandled "
5791 : "field format: %s",
5792 : format);
5793 :
5794 0 : return false;
5795 : }
5796 374 : nEltSize = nWidthInBytes;
5797 : }
5798 11229 : if (nEltSize)
5799 : {
5800 11229 : void *p = VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
5801 : nLength ? nEltSize * (nLength + nExtraElt) : 1);
5802 11229 : if (!p)
5803 : {
5804 0 : bRet = false;
5805 0 : break;
5806 : }
5807 11229 : if (nLength)
5808 : {
5809 13022 : if ((IsString(format) || IsBinary(format)) &&
5810 1793 : static_cast<const uint32_t *>(
5811 1793 : src_array->buffers[1])[nOffset] != 0)
5812 : {
5813 258 : const auto *CPL_RESTRICT pSrcOffsets =
5814 258 : static_cast<const uint32_t *>(
5815 258 : src_array->buffers[1]) +
5816 : nOffset;
5817 258 : const auto nShiftOffset = pSrcOffsets[0];
5818 258 : auto *CPL_RESTRICT pDstOffsets =
5819 : static_cast<uint32_t *>(p);
5820 1118 : for (size_t iRow = 0; iRow <= nLength; ++iRow)
5821 : {
5822 860 : pDstOffsets[iRow] =
5823 860 : pSrcOffsets[iRow] - nShiftOffset;
5824 : }
5825 : }
5826 11309 : else if ((IsLargeString(format) || IsLargeBinary(format)) &&
5827 338 : static_cast<const uint64_t *>(
5828 338 : src_array->buffers[1])[nOffset] != 0)
5829 : {
5830 86 : const auto *CPL_RESTRICT pSrcOffsets =
5831 86 : static_cast<const uint64_t *>(
5832 86 : src_array->buffers[1]) +
5833 : nOffset;
5834 86 : const auto nShiftOffset = pSrcOffsets[0];
5835 86 : auto *CPL_RESTRICT pDstOffsets =
5836 : static_cast<uint64_t *>(p);
5837 344 : for (size_t iRow = 0; iRow <= nLength; ++iRow)
5838 : {
5839 258 : pDstOffsets[iRow] =
5840 258 : pSrcOffsets[iRow] - nShiftOffset;
5841 : }
5842 : }
5843 : else
5844 : {
5845 10885 : memcpy(
5846 : p,
5847 10885 : static_cast<const GByte *>(src_array->buffers[i]) +
5848 10885 : nEltSize * nOffset,
5849 10885 : nEltSize * (nLength + nExtraElt));
5850 : }
5851 : }
5852 11229 : out_array->buffers[i] = p;
5853 : }
5854 : else
5855 : {
5856 0 : CPLError(CE_Failure, CPLE_AppDefined,
5857 : "OGRCloneArrowArray(): unhandled case, array = %s, "
5858 : "format = '%s', i = 1",
5859 0 : schema->name, format);
5860 0 : bRet = false;
5861 0 : break;
5862 : }
5863 : }
5864 2131 : else if (i == 2)
5865 : {
5866 2131 : CPLAssert(src_array->buffers[i]);
5867 2131 : size_t nSrcCharOffset = 0;
5868 2131 : size_t nCharCount = 0;
5869 2131 : if (IsString(format) || IsBinary(format))
5870 : {
5871 1793 : const auto *pSrcOffsets =
5872 1793 : static_cast<const uint32_t *>(src_array->buffers[1]) +
5873 : nOffset;
5874 1793 : nSrcCharOffset = pSrcOffsets[0];
5875 1793 : nCharCount = pSrcOffsets[nLength] - pSrcOffsets[0];
5876 : }
5877 338 : else if (IsLargeString(format) || IsLargeBinary(format))
5878 : {
5879 338 : const auto *pSrcOffsets =
5880 338 : static_cast<const uint64_t *>(src_array->buffers[1]) +
5881 : nOffset;
5882 338 : nSrcCharOffset = static_cast<size_t>(pSrcOffsets[0]);
5883 338 : nCharCount =
5884 338 : static_cast<size_t>(pSrcOffsets[nLength] - pSrcOffsets[0]);
5885 : }
5886 : else
5887 : {
5888 0 : CPLError(CE_Failure, CPLE_AppDefined,
5889 : "OGRCloneArrowArray(): unhandled case, array = %s, "
5890 : "format = '%s', i = 2",
5891 0 : schema->name, format);
5892 0 : bRet = false;
5893 0 : break;
5894 : }
5895 : void *p =
5896 2131 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nCharCount ? nCharCount : 1);
5897 2131 : if (!p)
5898 : {
5899 0 : bRet = false;
5900 0 : break;
5901 : }
5902 2131 : if (nCharCount)
5903 : {
5904 2131 : memcpy(p,
5905 2131 : static_cast<const GByte *>(src_array->buffers[i]) +
5906 : nSrcCharOffset,
5907 : nCharCount);
5908 : }
5909 2131 : out_array->buffers[i] = p;
5910 : }
5911 : else
5912 : {
5913 0 : CPLError(CE_Failure, CPLE_AppDefined,
5914 : "OGRCloneArrowArray(): unhandled case, array = %s, format "
5915 : "= '%s', i = 3",
5916 0 : schema->name, format);
5917 0 : bRet = false;
5918 0 : break;
5919 : }
5920 : }
5921 :
5922 14093 : if (bRet)
5923 : {
5924 14093 : out_array->n_children = src_array->n_children;
5925 14093 : out_array->children = static_cast<struct ArrowArray **>(
5926 14093 : CPLCalloc(static_cast<size_t>(src_array->n_children),
5927 : sizeof(struct ArrowArray *)));
5928 27945 : for (int64_t i = 0; i < src_array->n_children; ++i)
5929 : {
5930 27704 : out_array->children[i] = static_cast<struct ArrowArray *>(
5931 13852 : CPLCalloc(1, sizeof(struct ArrowArray)));
5932 40215 : if (!OGRCloneArrowArray(schema->children[i], src_array->children[i],
5933 13852 : out_array->children[i],
5934 13852 : IsFixedSizeList(format)
5935 1341 : ? nOffset * GetFixedSizeList(format)
5936 12511 : : IsStructure(format) ? nOffset
5937 : : 0))
5938 : {
5939 0 : bRet = false;
5940 0 : break;
5941 : }
5942 : }
5943 : }
5944 :
5945 14093 : if (bRet && src_array->dictionary)
5946 : {
5947 111 : out_array->dictionary = static_cast<struct ArrowArray *>(
5948 111 : CPLCalloc(1, sizeof(struct ArrowArray)));
5949 111 : bRet = OGRCloneArrowArray(schema->dictionary, src_array->dictionary,
5950 : out_array->dictionary, 0);
5951 : }
5952 :
5953 14093 : if (!bRet)
5954 : {
5955 0 : out_array->release(out_array);
5956 0 : memset(out_array, 0, sizeof(*out_array));
5957 : }
5958 14093 : return bRet;
5959 : }
5960 :
5961 : /** Full/deep copy of an array.
5962 : *
5963 : * Renormalize the offset of the array (and its children) to 0.
5964 : *
5965 : * In case of failure, out_array will be let in a released state.
5966 : *
5967 : * @param schema Schema of the array. Must *NOT* be NULL.
5968 : * @param src_array Source array. Must *NOT* be NULL.
5969 : * @param out_array Output array. Must *NOT* be NULL (but its content may be random)
5970 : * @return true if success.
5971 : */
5972 130 : bool OGRCloneArrowArray(const struct ArrowSchema *schema,
5973 : const struct ArrowArray *src_array,
5974 : struct ArrowArray *out_array)
5975 : {
5976 130 : return OGRCloneArrowArray(schema, src_array, out_array, 0);
5977 : }
5978 :
5979 : /************************************************************************/
5980 : /* OGRCloneArrowMetadata() */
5981 : /************************************************************************/
5982 :
5983 23 : static void *OGRCloneArrowMetadata(const void *pMetadata)
5984 : {
5985 23 : if (!pMetadata)
5986 19 : return nullptr;
5987 4 : std::vector<GByte> abyOut;
5988 4 : const GByte *pabyMetadata = static_cast<const GByte *>(pMetadata);
5989 : int32_t nKVP;
5990 4 : abyOut.insert(abyOut.end(), pabyMetadata, pabyMetadata + sizeof(int32_t));
5991 4 : memcpy(&nKVP, pabyMetadata, sizeof(int32_t));
5992 4 : pabyMetadata += sizeof(int32_t);
5993 8 : for (int i = 0; i < nKVP; ++i)
5994 : {
5995 : int32_t nSizeKey;
5996 0 : abyOut.insert(abyOut.end(), pabyMetadata,
5997 4 : pabyMetadata + sizeof(int32_t));
5998 4 : memcpy(&nSizeKey, pabyMetadata, sizeof(int32_t));
5999 4 : pabyMetadata += sizeof(int32_t);
6000 4 : abyOut.insert(abyOut.end(), pabyMetadata, pabyMetadata + nSizeKey);
6001 4 : pabyMetadata += nSizeKey;
6002 :
6003 : int32_t nSizeValue;
6004 0 : abyOut.insert(abyOut.end(), pabyMetadata,
6005 4 : pabyMetadata + sizeof(int32_t));
6006 4 : memcpy(&nSizeValue, pabyMetadata, sizeof(int32_t));
6007 4 : pabyMetadata += sizeof(int32_t);
6008 4 : abyOut.insert(abyOut.end(), pabyMetadata, pabyMetadata + nSizeValue);
6009 4 : pabyMetadata += nSizeValue;
6010 : }
6011 :
6012 4 : GByte *pabyOut = static_cast<GByte *>(VSI_MALLOC_VERBOSE(abyOut.size()));
6013 4 : if (pabyOut)
6014 4 : memcpy(pabyOut, abyOut.data(), abyOut.size());
6015 4 : return pabyOut;
6016 : }
6017 :
6018 : /************************************************************************/
6019 : /* OGRCloneArrowSchema() */
6020 : /************************************************************************/
6021 :
6022 : /** Full/deep copy of a schema.
6023 : *
6024 : * In case of failure, out_schema will be let in a released state.
6025 : *
6026 : * @param schema Schema to clone. Must *NOT* be NULL.
6027 : * @param out_schema Output schema. Must *NOT* be NULL (but its content may be random)
6028 : * @return true if success.
6029 : */
6030 23 : bool OGRCloneArrowSchema(const struct ArrowSchema *schema,
6031 : struct ArrowSchema *out_schema)
6032 : {
6033 23 : memset(out_schema, 0, sizeof(*out_schema));
6034 23 : out_schema->release = OGRLayerFullReleaseSchema;
6035 23 : out_schema->format = CPLStrdup(schema->format);
6036 23 : out_schema->name = CPLStrdup(schema->name);
6037 23 : out_schema->metadata = static_cast<const char *>(
6038 23 : const_cast<const void *>(OGRCloneArrowMetadata(schema->metadata)));
6039 23 : out_schema->flags = schema->flags;
6040 23 : if (schema->n_children)
6041 : {
6042 5 : out_schema->children =
6043 5 : static_cast<struct ArrowSchema **>(VSI_CALLOC_VERBOSE(
6044 : static_cast<int>(schema->n_children), sizeof(ArrowSchema *)));
6045 5 : if (!out_schema->children)
6046 : {
6047 0 : out_schema->release(out_schema);
6048 0 : return false;
6049 : }
6050 5 : out_schema->n_children = schema->n_children;
6051 23 : for (int i = 0; i < static_cast<int>(schema->n_children); ++i)
6052 : {
6053 36 : out_schema->children[i] = static_cast<struct ArrowSchema *>(
6054 18 : CPLMalloc(sizeof(ArrowSchema)));
6055 18 : if (!OGRCloneArrowSchema(schema->children[i],
6056 18 : out_schema->children[i]))
6057 : {
6058 0 : out_schema->release(out_schema);
6059 0 : return false;
6060 : }
6061 : }
6062 : }
6063 23 : if (schema->dictionary)
6064 : {
6065 0 : out_schema->dictionary =
6066 0 : static_cast<struct ArrowSchema *>(CPLMalloc(sizeof(ArrowSchema)));
6067 0 : if (!OGRCloneArrowSchema(schema->dictionary, out_schema->dictionary))
6068 : {
6069 0 : out_schema->release(out_schema);
6070 0 : return false;
6071 : }
6072 : }
6073 23 : return true;
6074 : }
6075 :
6076 : /************************************************************************/
6077 : /* OGRLayer::IsArrowSchemaSupported() */
6078 : /************************************************************************/
6079 :
6080 : const struct
6081 : {
6082 : const char *arrowType;
6083 : OGRFieldType eType;
6084 : OGRFieldSubType eSubType;
6085 : } gasArrowTypesToOGR[] = {
6086 : {"b", OFTInteger, OFSTBoolean}, // Boolean
6087 : {"c", OFTInteger, OFSTInt16}, // Int8
6088 : {"C", OFTInteger, OFSTInt16}, // UInt8
6089 : {"s", OFTInteger, OFSTInt16}, // Int16
6090 : {"S", OFTInteger, OFSTNone}, // UInt16
6091 : {"i", OFTInteger, OFSTNone}, // Int32
6092 : {"I", OFTInteger64, OFSTNone}, // UInt32
6093 : {"l", OFTInteger64, OFSTNone}, // Int64
6094 : {"L", OFTReal,
6095 : OFSTNone}, // UInt64 (potentially lossy conversion if going through OGRFeature)
6096 : {"e", OFTReal, OFSTFloat32}, // float16
6097 : {"f", OFTReal, OFSTFloat32}, // float32
6098 : {"g", OFTReal, OFSTNone}, // float64
6099 : {"z", OFTBinary, OFSTNone}, // binary
6100 : {"Z", OFTBinary,
6101 : OFSTNone}, // large binary (will be limited to 32 bit length though if going through OGRFeature!)
6102 : {"u", OFTString, OFSTNone}, // string
6103 : {"U", OFTString, OFSTNone}, // large string
6104 : {"vu", OFTString, OFSTNone}, // string view
6105 : {"tdD", OFTDate, OFSTNone}, // date32[days]
6106 : {"tdm", OFTDate, OFSTNone}, // date64[milliseconds]
6107 : {"tts", OFTTime, OFSTNone}, // time32 [seconds]
6108 : {"ttm", OFTTime, OFSTNone}, // time32 [milliseconds]
6109 : {"ttu", OFTTime, OFSTNone}, // time64 [microseconds]
6110 : {"ttn", OFTTime, OFSTNone}, // time64 [nanoseconds]
6111 : };
6112 :
6113 : const struct
6114 : {
6115 : const char arrowLetter;
6116 : OGRFieldType eType;
6117 : OGRFieldSubType eSubType;
6118 : } gasListTypes[] = {
6119 : {ARROW_LETTER_BOOLEAN, OFTIntegerList, OFSTBoolean},
6120 : {ARROW_LETTER_INT8, OFTIntegerList, OFSTInt16},
6121 : {ARROW_LETTER_UINT8, OFTIntegerList, OFSTInt16},
6122 : {ARROW_LETTER_INT16, OFTIntegerList, OFSTInt16},
6123 : {ARROW_LETTER_UINT16, OFTIntegerList, OFSTNone},
6124 : {ARROW_LETTER_INT32, OFTIntegerList, OFSTNone},
6125 : {ARROW_LETTER_UINT32, OFTInteger64List, OFSTNone},
6126 : {ARROW_LETTER_INT64, OFTInteger64List, OFSTNone},
6127 : {ARROW_LETTER_UINT64, OFTRealList,
6128 : OFSTNone}, //(potentially lossy conversion if going through OGRFeature)
6129 : {ARROW_LETTER_FLOAT16, OFTRealList, OFSTFloat32},
6130 : {ARROW_LETTER_FLOAT32, OFTRealList, OFSTFloat32},
6131 : {ARROW_LETTER_FLOAT64, OFTRealList, OFSTNone},
6132 : {ARROW_LETTER_STRING, OFTStringList, OFSTNone},
6133 : {ARROW_LETTER_LARGE_STRING, OFTStringList, OFSTNone},
6134 : };
6135 :
6136 43 : static inline bool IsValidDictionaryIndexType(const char *format)
6137 : {
6138 40 : return (format[0] == ARROW_LETTER_INT8 || format[0] == ARROW_LETTER_UINT8 ||
6139 37 : format[0] == ARROW_LETTER_INT16 ||
6140 34 : format[0] == ARROW_LETTER_UINT16 ||
6141 31 : format[0] == ARROW_LETTER_INT32 ||
6142 9 : format[0] == ARROW_LETTER_UINT32 ||
6143 6 : format[0] == ARROW_LETTER_INT64 ||
6144 89 : format[0] == ARROW_LETTER_UINT64) &&
6145 86 : format[1] == 0;
6146 : }
6147 :
6148 230 : static bool IsSupportForJSONObj(const struct ArrowSchema *schema)
6149 : {
6150 230 : const char *format = schema->format;
6151 230 : if (IsStructure(format))
6152 : {
6153 35 : for (int64_t i = 0; i < schema->n_children; ++i)
6154 : {
6155 26 : if (!IsSupportForJSONObj(schema->children[i]))
6156 0 : return false;
6157 : }
6158 9 : return true;
6159 : }
6160 :
6161 2752 : for (const auto &sType : gasListTypes)
6162 : {
6163 2626 : if (format[0] == sType.arrowLetter && format[1] == 0)
6164 : {
6165 95 : return true;
6166 : }
6167 : }
6168 :
6169 366 : if (IsBinary(format) || IsLargeBinary(format) ||
6170 366 : IsFixedWidthBinary(format) || IsStringView(format))
6171 12 : return true;
6172 :
6173 114 : if (IsDecimal(format))
6174 : {
6175 6 : int nPrecision = 0;
6176 6 : int nScale = 0;
6177 6 : int nWidthInBytes = 0;
6178 6 : if (!ParseDecimalFormat(format, nPrecision, nScale, nWidthInBytes))
6179 : {
6180 0 : CPLError(CE_Failure, CPLE_AppDefined, "Invalid field format %s",
6181 : format);
6182 0 : return false;
6183 : }
6184 :
6185 6 : return GetErrorIfUnsupportedDecimal(nWidthInBytes, nPrecision) ==
6186 6 : nullptr;
6187 : }
6188 :
6189 108 : if (IsMap(format))
6190 : {
6191 74 : return IsStructure(schema->children[0]->format) &&
6192 148 : schema->children[0]->n_children == 2 &&
6193 222 : IsString(schema->children[0]->children[0]->format) &&
6194 148 : IsSupportForJSONObj(schema->children[0]->children[1]);
6195 : }
6196 :
6197 34 : if (IsList(format) || IsLargeList(format) || IsFixedSizeList(format))
6198 : {
6199 34 : return IsSupportForJSONObj(schema->children[0]);
6200 : }
6201 :
6202 0 : return false;
6203 : }
6204 :
6205 544 : static bool IsArrowSchemaSupportedInternal(const struct ArrowSchema *schema,
6206 : const std::string &osFieldPrefix,
6207 : std::string &osErrorMsg)
6208 : {
6209 0 : const auto AppendError = [&osErrorMsg](const std::string &osMsg)
6210 : {
6211 0 : if (!osErrorMsg.empty())
6212 0 : osErrorMsg += " ";
6213 0 : osErrorMsg += osMsg;
6214 544 : };
6215 :
6216 544 : const char *fieldName = schema->name;
6217 544 : const char *format = schema->format;
6218 544 : if (IsStructure(format))
6219 : {
6220 5 : bool bRet = true;
6221 5 : const std::string osNewPrefix(osFieldPrefix + fieldName + ".");
6222 21 : for (int64_t i = 0; i < schema->n_children; ++i)
6223 : {
6224 16 : if (!IsArrowSchemaSupportedInternal(schema->children[i],
6225 : osNewPrefix, osErrorMsg))
6226 0 : bRet = false;
6227 : }
6228 5 : return bRet;
6229 : }
6230 :
6231 539 : if (schema->dictionary)
6232 : {
6233 15 : if (!IsValidDictionaryIndexType(format))
6234 : {
6235 0 : AppendError("Dictionary only supported if the parent is of "
6236 : "type [U]Int[8|16|32|64]");
6237 0 : return false;
6238 : }
6239 :
6240 15 : schema = schema->dictionary;
6241 15 : format = schema->format;
6242 : }
6243 :
6244 539 : if (IsList(format) || IsLargeList(format) || IsFixedSizeList(format))
6245 : {
6246 : // Only some subtypes supported
6247 132 : const char *childFormat = schema->children[0]->format;
6248 1103 : for (const auto &sType : gasListTypes)
6249 : {
6250 1088 : if (childFormat[0] == sType.arrowLetter && childFormat[1] == 0)
6251 : {
6252 117 : return true;
6253 : }
6254 : }
6255 15 : if (IsStringView(childFormat))
6256 0 : return true;
6257 :
6258 15 : if (IsDecimal(childFormat))
6259 : {
6260 7 : int nPrecision = 0;
6261 7 : int nScale = 0;
6262 7 : int nWidthInBytes = 0;
6263 7 : if (!ParseDecimalFormat(childFormat, nPrecision, nScale,
6264 : nWidthInBytes))
6265 : {
6266 0 : AppendError(std::string("Invalid field format ") + childFormat +
6267 0 : " for field " + osFieldPrefix + fieldName);
6268 0 : return false;
6269 : }
6270 :
6271 : const char *pszError =
6272 7 : GetErrorIfUnsupportedDecimal(nWidthInBytes, nPrecision);
6273 7 : if (pszError)
6274 : {
6275 0 : AppendError(pszError);
6276 0 : return false;
6277 : }
6278 :
6279 7 : return true;
6280 : }
6281 :
6282 8 : if (IsSupportForJSONObj(schema))
6283 : {
6284 8 : return true;
6285 : }
6286 :
6287 0 : AppendError("Type list for field " + osFieldPrefix + fieldName +
6288 : " is not supported.");
6289 0 : return false;
6290 : }
6291 :
6292 407 : else if (IsMap(format))
6293 : {
6294 70 : if (IsSupportForJSONObj(schema))
6295 70 : return true;
6296 :
6297 0 : AppendError("Type map for field " + osFieldPrefix + fieldName +
6298 : " is not supported.");
6299 0 : return false;
6300 : }
6301 337 : else if (IsDecimal(format))
6302 : {
6303 6 : int nPrecision = 0;
6304 6 : int nScale = 0;
6305 6 : int nWidthInBytes = 0;
6306 6 : if (!ParseDecimalFormat(format, nPrecision, nScale, nWidthInBytes))
6307 : {
6308 0 : AppendError(std::string("Invalid field format ") + format +
6309 0 : " for field " + osFieldPrefix + fieldName);
6310 0 : return false;
6311 : }
6312 :
6313 : const char *pszError =
6314 6 : GetErrorIfUnsupportedDecimal(nWidthInBytes, nPrecision);
6315 6 : if (pszError)
6316 : {
6317 0 : AppendError(pszError);
6318 0 : return false;
6319 : }
6320 :
6321 6 : return true;
6322 : }
6323 : else
6324 : {
6325 4277 : for (const auto &sType : gasArrowTypesToOGR)
6326 : {
6327 4257 : if (strcmp(format, sType.arrowType) == 0)
6328 : {
6329 311 : return true;
6330 : }
6331 : }
6332 :
6333 20 : if (IsFixedWidthBinary(format) || IsTimestamp(format))
6334 20 : return true;
6335 :
6336 0 : AppendError("Type '" + std::string(format) + "' for field " +
6337 0 : osFieldPrefix + fieldName + " is not supported.");
6338 0 : return false;
6339 : }
6340 : }
6341 :
6342 : /** Returns whether the provided ArrowSchema is supported for writing.
6343 : *
6344 : * This method exists since not all drivers may support all Arrow data types.
6345 : *
6346 : * The ArrowSchema must be of type struct (format=+s)
6347 : *
6348 : * It is recommended to call this method before calling WriteArrowBatch().
6349 : *
6350 : * This is the same as the C function OGR_L_IsArrowSchemaSupported().
6351 : *
6352 : * @param schema Schema of type struct (format = '+s')
6353 : * @param papszOptions Options (none currently). Null terminated list, or nullptr.
6354 : * @param[out] osErrorMsg Reason of the failure, when this method returns false.
6355 : * @return true if the ArrowSchema is supported for writing.
6356 : * @since 3.8
6357 : */
6358 59 : bool OGRLayer::IsArrowSchemaSupported(const struct ArrowSchema *schema,
6359 : CPL_UNUSED CSLConstList papszOptions,
6360 : std::string &osErrorMsg) const
6361 : {
6362 59 : if (!IsStructure(schema->format))
6363 : {
6364 : osErrorMsg =
6365 : "IsArrowSchemaSupported() should be called on a schema that is a "
6366 1 : "struct of fields";
6367 1 : return false;
6368 : }
6369 :
6370 58 : bool bRet = true;
6371 586 : for (int64_t i = 0; i < schema->n_children; ++i)
6372 : {
6373 528 : if (!IsArrowSchemaSupportedInternal(schema->children[i], std::string(),
6374 : osErrorMsg))
6375 0 : bRet = false;
6376 : }
6377 58 : return bRet;
6378 : }
6379 :
6380 : /************************************************************************/
6381 : /* OGR_L_IsArrowSchemaSupported() */
6382 : /************************************************************************/
6383 :
6384 : /** Returns whether the provided ArrowSchema is supported for writing.
6385 : *
6386 : * This function exists since not all drivers may support all Arrow data types.
6387 : *
6388 : * The ArrowSchema must be of type struct (format=+s)
6389 : *
6390 : * It is recommended to call this function before calling OGR_L_WriteArrowBatch().
6391 : *
6392 : * This is the same as the C++ method OGRLayer::IsArrowSchemaSupported().
6393 : *
6394 : * @param hLayer Layer.
6395 : * @param schema Schema of type struct (format = '+s')
6396 : * @param papszOptions Options (none currently). Null terminated list, or nullptr.
6397 : * @param[out] ppszErrorMsg nullptr, or pointer to a string that will contain
6398 : * the reason of the failure, when this function returns false.
6399 : * @return true if the ArrowSchema is supported for writing.
6400 : * @since 3.8
6401 : */
6402 19 : bool OGR_L_IsArrowSchemaSupported(OGRLayerH hLayer,
6403 : const struct ArrowSchema *schema,
6404 : CSLConstList papszOptions,
6405 : char **ppszErrorMsg)
6406 : {
6407 19 : VALIDATE_POINTER1(hLayer, __func__, false);
6408 19 : VALIDATE_POINTER1(schema, __func__, false);
6409 :
6410 38 : std::string osErrorMsg;
6411 38 : if (!OGRLayer::FromHandle(hLayer)->IsArrowSchemaSupported(
6412 19 : schema, papszOptions, osErrorMsg))
6413 : {
6414 4 : if (ppszErrorMsg)
6415 4 : *ppszErrorMsg = VSIStrdup(osErrorMsg.c_str());
6416 4 : return false;
6417 : }
6418 : else
6419 : {
6420 15 : if (ppszErrorMsg)
6421 15 : *ppszErrorMsg = nullptr;
6422 15 : return true;
6423 : }
6424 : }
6425 :
6426 : /************************************************************************/
6427 : /* IsKnownCodedFieldDomain() */
6428 : /************************************************************************/
6429 :
6430 34 : static bool IsKnownCodedFieldDomain(OGRLayer *poLayer,
6431 : const char *arrowMetadata)
6432 : {
6433 34 : if (arrowMetadata)
6434 : {
6435 6 : const auto oMetadata = OGRParseArrowMetadata(arrowMetadata);
6436 6 : for (const auto &oIter : oMetadata)
6437 : {
6438 6 : if (oIter.first == MD_GDAL_OGR_DOMAIN_NAME)
6439 : {
6440 6 : auto poDS = poLayer->GetDataset();
6441 6 : if (poDS)
6442 : {
6443 : const auto poFieldDomain =
6444 6 : poDS->GetFieldDomain(oIter.second);
6445 12 : if (poFieldDomain &&
6446 6 : poFieldDomain->GetDomainType() == OFDT_CODED)
6447 : {
6448 6 : return true;
6449 : }
6450 : }
6451 : }
6452 : }
6453 : }
6454 28 : return false;
6455 : }
6456 :
6457 : /************************************************************************/
6458 : /* OGRLayer::CreateFieldFromArrowSchema() */
6459 : /************************************************************************/
6460 :
6461 : //! @cond Doxygen_Suppress
6462 472 : bool OGRLayer::CreateFieldFromArrowSchemaInternal(
6463 : const struct ArrowSchema *schema, const std::string &osFieldPrefix,
6464 : CSLConstList papszOptions)
6465 : {
6466 472 : const char *fieldName = schema->name;
6467 472 : const char *format = schema->format;
6468 472 : if (IsStructure(format))
6469 : {
6470 5 : if (IsArrowTimeStampWithOffsetField(schema))
6471 : {
6472 0 : OGRFieldDefn oFieldDefn((osFieldPrefix + fieldName).c_str(),
6473 0 : OFTDateTime);
6474 0 : oFieldDefn.SetTZFlag(OGR_TZFLAG_MIXED_TZ);
6475 0 : auto poLayerDefn = GetLayerDefn();
6476 0 : const int nFieldCountBefore = poLayerDefn->GetFieldCount();
6477 0 : if (CreateField(&oFieldDefn) != OGRERR_NONE ||
6478 0 : nFieldCountBefore + 1 != poLayerDefn->GetFieldCount())
6479 : {
6480 0 : return false;
6481 : }
6482 : }
6483 : else
6484 : {
6485 5 : const std::string osNewPrefix(osFieldPrefix + fieldName + ".");
6486 21 : for (int64_t i = 0; i < schema->n_children; ++i)
6487 : {
6488 16 : if (!CreateFieldFromArrowSchemaInternal(
6489 16 : schema->children[i], osNewPrefix, papszOptions))
6490 0 : return false;
6491 : }
6492 : }
6493 5 : return true;
6494 : }
6495 :
6496 934 : CPLStringList aosNativeTypes;
6497 467 : auto poLayer = const_cast<OGRLayer *>(this);
6498 467 : auto poDS = poLayer->GetDataset();
6499 467 : if (poDS)
6500 : {
6501 467 : auto poDriver = poDS->GetDriver();
6502 467 : if (poDriver)
6503 : {
6504 : const char *pszMetadataItem =
6505 467 : poDriver->GetMetadataItem(GDAL_DMD_CREATIONFIELDDATATYPES);
6506 467 : if (pszMetadataItem)
6507 467 : aosNativeTypes = CSLTokenizeString2(pszMetadataItem, " ", 0);
6508 : }
6509 : }
6510 :
6511 482 : if (schema->dictionary &&
6512 15 : !IsKnownCodedFieldDomain(poLayer, schema->metadata))
6513 : {
6514 13 : if (!IsValidDictionaryIndexType(format))
6515 : {
6516 0 : CPLError(CE_Failure, CPLE_NotSupported,
6517 : "Dictionary only supported if the parent is of "
6518 : "type [U]Int[8|16|32|64]");
6519 0 : return false;
6520 : }
6521 :
6522 13 : schema = schema->dictionary;
6523 13 : format = schema->format;
6524 : }
6525 :
6526 467 : const auto AddField = [this, schema, fieldName, &aosNativeTypes,
6527 : &osFieldPrefix, poDS](OGRFieldType eTypeIn,
6528 : OGRFieldSubType eSubTypeIn,
6529 3305 : int nWidth, int nPrecision)
6530 : {
6531 467 : const char *pszTypeName = OGRFieldDefn::GetFieldTypeName(eTypeIn);
6532 467 : auto eTypeOut = eTypeIn;
6533 467 : auto eSubTypeOut = eSubTypeIn;
6534 934 : if (!aosNativeTypes.empty() &&
6535 467 : aosNativeTypes.FindString(pszTypeName) < 0)
6536 : {
6537 20 : eTypeOut = OFTString;
6538 20 : eSubTypeOut =
6539 15 : (eTypeIn == OFTIntegerList || eTypeIn == OFTInteger64List ||
6540 8 : eTypeIn == OFTRealList || eTypeIn == OFTStringList)
6541 35 : ? OFSTJSON
6542 : : OFSTNone;
6543 : }
6544 :
6545 934 : const std::string osWantedOGRFieldName = osFieldPrefix + fieldName;
6546 934 : OGRFieldDefn oFieldDefn(osWantedOGRFieldName.c_str(), eTypeOut);
6547 467 : oFieldDefn.SetSubType(eSubTypeOut);
6548 467 : if (eTypeOut == eTypeIn && eSubTypeOut == eSubTypeIn)
6549 : {
6550 447 : oFieldDefn.SetWidth(nWidth);
6551 447 : oFieldDefn.SetPrecision(nPrecision);
6552 : }
6553 467 : oFieldDefn.SetNullable((schema->flags & ARROW_FLAG_NULLABLE) != 0);
6554 :
6555 467 : if (schema->metadata)
6556 : {
6557 62 : const auto oMetadata = OGRParseArrowMetadata(schema->metadata);
6558 63 : for (const auto &oIter : oMetadata)
6559 : {
6560 32 : if (oIter.first == MD_GDAL_OGR_TYPE)
6561 : {
6562 4 : const auto &osType = oIter.second;
6563 48 : for (auto eType = OFTInteger; eType <= OFTMaxType;)
6564 : {
6565 48 : if (OGRFieldDefn::GetFieldTypeName(eType) == osType)
6566 : {
6567 4 : oFieldDefn.SetType(eType);
6568 4 : break;
6569 : }
6570 44 : if (eType == OFTMaxType)
6571 0 : break;
6572 : else
6573 44 : eType = static_cast<OGRFieldType>(eType + 1);
6574 : }
6575 : }
6576 28 : else if (oIter.first == MD_GDAL_OGR_ALTERNATIVE_NAME)
6577 2 : oFieldDefn.SetAlternativeName(oIter.second.c_str());
6578 26 : else if (oIter.first == MD_GDAL_OGR_COMMENT)
6579 2 : oFieldDefn.SetComment(oIter.second);
6580 24 : else if (oIter.first == MD_GDAL_OGR_DEFAULT)
6581 2 : oFieldDefn.SetDefault(oIter.second.c_str());
6582 22 : else if (oIter.first == MD_GDAL_OGR_SUBTYPE)
6583 : {
6584 5 : if (eTypeIn == eTypeOut)
6585 : {
6586 4 : const auto &osSubType = oIter.second;
6587 4 : for (auto eSubType = OFSTNone;
6588 15 : eSubType <= OFSTMaxSubType;)
6589 : {
6590 15 : if (OGRFieldDefn::GetFieldSubTypeName(eSubType) ==
6591 : osSubType)
6592 : {
6593 4 : oFieldDefn.SetSubType(eSubType);
6594 4 : break;
6595 : }
6596 11 : if (eSubType == OFSTMaxSubType)
6597 0 : break;
6598 : else
6599 11 : eSubType =
6600 11 : static_cast<OGRFieldSubType>(eSubType + 1);
6601 : }
6602 : }
6603 : }
6604 17 : else if (oIter.first == MD_GDAL_OGR_WIDTH)
6605 6 : oFieldDefn.SetWidth(atoi(oIter.second.c_str()));
6606 11 : else if (oIter.first == MD_GDAL_OGR_UNIQUE)
6607 2 : oFieldDefn.SetUnique(oIter.second == "true");
6608 9 : else if (oIter.first == MD_GDAL_OGR_DOMAIN_NAME)
6609 : {
6610 2 : if (poDS && poDS->GetFieldDomain(oIter.second))
6611 2 : oFieldDefn.SetDomainName(oIter.second);
6612 : }
6613 13 : else if (oIter.first == ARROW_EXTENSION_NAME_KEY &&
6614 6 : (oIter.second == EXTENSION_NAME_ARROW_JSON ||
6615 : // Used by BigQuery through ADBC driver
6616 0 : oIter.second == "google:sqlType:json"))
6617 : {
6618 6 : oFieldDefn.SetSubType(OFSTJSON);
6619 : }
6620 1 : else if (oIter.first == ARROW_EXTENSION_NAME_KEY)
6621 : {
6622 0 : CPLDebug("OGR", "Unknown Arrow extension: %s",
6623 : oIter.second.c_str());
6624 : }
6625 : else
6626 : {
6627 1 : CPLDebug("OGR", "Unknown field metadata: %s",
6628 : oIter.first.c_str());
6629 : }
6630 : }
6631 : }
6632 467 : auto poLayerDefn = GetLayerDefn();
6633 467 : const int nFieldCountBefore = poLayerDefn->GetFieldCount();
6634 934 : if (CreateField(&oFieldDefn) != OGRERR_NONE ||
6635 467 : nFieldCountBefore + 1 != poLayerDefn->GetFieldCount())
6636 : {
6637 0 : return false;
6638 : }
6639 : const char *pszActualFieldName =
6640 467 : poLayerDefn->GetFieldDefn(nFieldCountBefore)->GetNameRef();
6641 467 : if (pszActualFieldName != osWantedOGRFieldName)
6642 : {
6643 : m_poPrivate
6644 1 : ->m_oMapArrowFieldNameToOGRFieldName[osWantedOGRFieldName] =
6645 1 : pszActualFieldName;
6646 : }
6647 467 : return true;
6648 467 : };
6649 :
6650 8437 : for (const auto &sType : gasArrowTypesToOGR)
6651 : {
6652 8206 : if (strcmp(format, sType.arrowType) == 0)
6653 : {
6654 236 : return AddField(sType.eType, sType.eSubType, 0, 0);
6655 : }
6656 : }
6657 :
6658 231 : if (IsMap(format))
6659 : {
6660 70 : return AddField(OFTString, OFSTJSON, 0, 0);
6661 : }
6662 :
6663 161 : if (IsTimestamp(format))
6664 : {
6665 20 : return AddField(OFTDateTime, OFSTNone, 0, 0);
6666 : }
6667 :
6668 141 : if (IsFixedWidthBinary(format))
6669 : {
6670 3 : return AddField(OFTBinary, OFSTNone, GetFixedWithBinary(format), 0);
6671 : }
6672 :
6673 138 : if (IsList(format) || IsLargeList(format) || IsFixedSizeList(format))
6674 : {
6675 132 : const char *childFormat = schema->children[0]->format;
6676 1103 : for (const auto &sType : gasListTypes)
6677 : {
6678 1088 : if (childFormat[0] == sType.arrowLetter && childFormat[1] == 0)
6679 : {
6680 117 : return AddField(sType.eType, sType.eSubType, 0, 0);
6681 : }
6682 : }
6683 :
6684 15 : if (IsStringView(childFormat))
6685 : {
6686 0 : return AddField(OFTStringList, OFSTNone, 0, 0);
6687 : }
6688 :
6689 15 : if (IsDecimal(childFormat))
6690 : {
6691 7 : int nPrecision = 0;
6692 7 : int nScale = 0;
6693 7 : int nWidthInBytes = 0;
6694 7 : if (!ParseDecimalFormat(childFormat, nPrecision, nScale,
6695 : nWidthInBytes))
6696 : {
6697 0 : CPLError(CE_Failure, CPLE_AppDefined, "%s",
6698 0 : (std::string("Invalid field format ") + format +
6699 0 : " for field " + osFieldPrefix + fieldName)
6700 : .c_str());
6701 0 : return false;
6702 : }
6703 :
6704 : const char *pszError =
6705 7 : GetErrorIfUnsupportedDecimal(nWidthInBytes, nPrecision);
6706 7 : if (pszError)
6707 : {
6708 0 : CPLError(CE_Failure, CPLE_NotSupported, "%s", pszError);
6709 0 : return false;
6710 : }
6711 :
6712 : // DBF convention: add space for negative sign and decimal separator
6713 7 : return AddField(OFTRealList, OFSTNone, nPrecision + 2, nScale);
6714 : }
6715 :
6716 8 : if (IsSupportForJSONObj(schema->children[0]))
6717 : {
6718 8 : return AddField(OFTString, OFSTJSON, 0, 0);
6719 : }
6720 :
6721 0 : CPLError(CE_Failure, CPLE_NotSupported, "%s",
6722 0 : ("List of type '" + std::string(childFormat) + "' for field " +
6723 0 : osFieldPrefix + fieldName + " is not supported.")
6724 : .c_str());
6725 0 : return false;
6726 : }
6727 :
6728 6 : if (IsDecimal(format))
6729 : {
6730 6 : int nPrecision = 0;
6731 6 : int nScale = 0;
6732 6 : int nWidthInBytes = 0;
6733 6 : if (!ParseDecimalFormat(format, nPrecision, nScale, nWidthInBytes))
6734 : {
6735 0 : CPLError(CE_Failure, CPLE_AppDefined, "%s",
6736 0 : (std::string("Invalid field format ") + format +
6737 0 : " for field " + osFieldPrefix + fieldName)
6738 : .c_str());
6739 0 : return false;
6740 : }
6741 :
6742 : const char *pszError =
6743 6 : GetErrorIfUnsupportedDecimal(nWidthInBytes, nPrecision);
6744 6 : if (pszError)
6745 : {
6746 0 : CPLError(CE_Failure, CPLE_NotSupported, "%s", pszError);
6747 0 : return false;
6748 : }
6749 :
6750 : // DBF convention: add space for negative sign and decimal separator
6751 6 : return AddField(OFTReal, OFSTNone, nPrecision + 2, nScale);
6752 : }
6753 :
6754 0 : CPLError(CE_Failure, CPLE_NotSupported, "%s",
6755 0 : ("Type '" + std::string(format) + "' for field " + osFieldPrefix +
6756 0 : fieldName + " is not supported.")
6757 : .c_str());
6758 0 : return false;
6759 : }
6760 :
6761 : //! @endcond
6762 :
6763 : /** Creates a field from an ArrowSchema.
6764 : *
6765 : * This should only be used for attribute fields. Geometry fields should
6766 : * be created with CreateGeomField(). The FID field should also not be
6767 : * passed with this method.
6768 : *
6769 : * Contrary to the IsArrowSchemaSupported() and WriteArrowBatch() methods, the
6770 : * passed schema must be for an individual field, and thus, is *not* of type
6771 : * struct (format=+s) (unless writing a set of fields grouped together in the
6772 : * same structure).
6773 : *
6774 : * Additional field metadata can be specified through the ArrowSchema::metadata
6775 : * field with the potential following items:
6776 : * <ul>
6777 : * <li>"GDAL:OGR:alternative_name": value of
6778 : * OGRFieldDefn::GetAlternativeNameRef()</li>
6779 : * <li>"GDAL:OGR:comment": value of OGRFieldDefn::GetComment()</li>
6780 : * <li>"GDAL:OGR:default": value of OGRFieldDefn::GetDefault()</li>
6781 : * <li>"GDAL:OGR:subtype": value of OGRFieldDefn::GetSubType()</li>
6782 : * <li>"GDAL:OGR:width": value of OGRFieldDefn::GetWidth() (serialized as a
6783 : * string)</li>
6784 : * <li>"GDAL:OGR:unique": value of OGRFieldDefn::IsUnique() (serialized as
6785 : * "true" or "false")</li>
6786 : * <li>"GDAL:OGR:domain_name": value of OGRFieldDefn::GetDomainName()</li>
6787 : * </ul>
6788 : *
6789 : * This method and CreateField() are mutually exclusive in the same session.
6790 : *
6791 : * This method is the same as the C function OGR_L_CreateFieldFromArrowSchema().
6792 : *
6793 : * @param schema Schema of the field to create.
6794 : * @param papszOptions Options (none currently). Null terminated list, or nullptr.
6795 : * @return true in case of success
6796 : * @since 3.8
6797 : */
6798 456 : bool OGRLayer::CreateFieldFromArrowSchema(const struct ArrowSchema *schema,
6799 : CSLConstList papszOptions)
6800 : {
6801 912 : return CreateFieldFromArrowSchemaInternal(schema, std::string(),
6802 912 : papszOptions);
6803 : }
6804 :
6805 : /************************************************************************/
6806 : /* OGR_L_CreateFieldFromArrowSchema() */
6807 : /************************************************************************/
6808 :
6809 : /** Creates a field from an ArrowSchema.
6810 : *
6811 : * This should only be used for attribute fields. Geometry fields should
6812 : * be created with CreateGeomField(). The FID field should also not be
6813 : * passed with this method.
6814 : *
6815 : * Contrary to the IsArrowSchemaSupported() and WriteArrowBatch() methods, the
6816 : * passed schema must be for an individual field, and thus, is *not* of type
6817 : * struct (format=+s) (unless writing a set of fields grouped together in the
6818 : * same structure).
6819 : *
6820 : * Additional field metadata can be specified through the ArrowSchema::metadata
6821 : * field with the potential following items:
6822 : * <ul>
6823 : * <li>"GDAL:OGR:alternative_name": value of
6824 : * OGRFieldDefn::GetAlternativeNameRef()</li>
6825 : * <li>"GDAL:OGR:comment": value of OGRFieldDefn::GetComment()</li>
6826 : * <li>"GDAL:OGR:default": value of OGRFieldDefn::GetDefault()</li>
6827 : * <li>"GDAL:OGR:subtype": value of OGRFieldDefn::GetSubType()</li>
6828 : * <li>"GDAL:OGR:width": value of OGRFieldDefn::GetWidth() (serialized as a
6829 : * string)</li>
6830 : * <li>"GDAL:OGR:unique": value of OGRFieldDefn::IsUnique() (serialized as
6831 : * "true" or "false")</li>
6832 : * <li>"GDAL:OGR:domain_name": value of OGRFieldDefn::GetDomainName()</li>
6833 : * </ul>
6834 : *
6835 : * This method and CreateField() are mutually exclusive in the same session.
6836 : *
6837 : * This method is the same as the C++ method OGRLayer::CreateFieldFromArrowSchema().
6838 : *
6839 : * @param hLayer Layer.
6840 : * @param schema Schema of the field to create.
6841 : * @param papszOptions Options (none currently). Null terminated list, or nullptr.
6842 : * @return true in case of success
6843 : * @since 3.8
6844 : */
6845 541 : bool OGR_L_CreateFieldFromArrowSchema(OGRLayerH hLayer,
6846 : const struct ArrowSchema *schema,
6847 : CSLConstList papszOptions)
6848 : {
6849 541 : VALIDATE_POINTER1(hLayer, __func__, false);
6850 541 : VALIDATE_POINTER1(schema, __func__, false);
6851 :
6852 1082 : return OGRLayer::FromHandle(hLayer)->CreateFieldFromArrowSchema(
6853 541 : schema, papszOptions);
6854 : }
6855 :
6856 : /************************************************************************/
6857 : /* BuildOGRFieldInfo() */
6858 : /************************************************************************/
6859 :
6860 : constexpr int FID_COLUMN_SPECIAL_OGR_FIELD_IDX = -2;
6861 :
6862 : struct FieldInfo
6863 : {
6864 : std::string osName{};
6865 : int iOGRFieldIdx = -1;
6866 : const char *format = nullptr;
6867 : OGRFieldType eNominalFieldType =
6868 : OFTMaxType; // OGR data type that would best match the Arrow type
6869 : OGRFieldType eTargetFieldType =
6870 : OFTMaxType; // actual OGR data type of the layer field
6871 : // OGR data type of the feature passed to FillFeature()
6872 : OGRFieldType eSetFeatureFieldType = OFTMaxType;
6873 : bool bIsGeomCol = false;
6874 : bool bUseDictionary = false;
6875 : bool bUseStringOptim = false;
6876 : int nWidthInBytes = 0; // only used for decimal fields
6877 : int nPrecision = 0; // only used for decimal fields
6878 : int nScale = 0; // only used for decimal fields
6879 : };
6880 :
6881 779 : static bool BuildOGRFieldInfo(
6882 : const struct ArrowSchema *schema, struct ArrowArray *array,
6883 : const OGRFeatureDefn *poFeatureDefn, const std::string &osFieldPrefix,
6884 : const CPLStringList &aosNativeTypes, bool &bFallbackTypesUsed,
6885 : std::vector<FieldInfo> &asFieldInfo, const char *pszFIDName,
6886 : const char *pszGeomFieldName, OGRLayer *poLayer,
6887 : const std::map<std::string, std::string> &oMapArrowFieldNameToOGRFieldName,
6888 : const struct ArrowSchema *&schemaFIDColumn,
6889 : struct ArrowArray *&arrayFIDColumn)
6890 : {
6891 779 : const char *fieldName = schema->name;
6892 779 : const char *format = schema->format;
6893 779 : if (IsStructure(format))
6894 : {
6895 9 : if (IsArrowTimeStampWithOffsetField(schema))
6896 : {
6897 0 : FieldInfo sInfo;
6898 0 : sInfo.osName = fieldName;
6899 0 : sInfo.format = "+s";
6900 0 : sInfo.eNominalFieldType = OFTDateTime;
6901 : const std::string &osExpectedOGRFieldName =
6902 0 : [&oMapArrowFieldNameToOGRFieldName,
6903 0 : &sInfo]() -> const std::string &
6904 : {
6905 : const auto oIter =
6906 0 : oMapArrowFieldNameToOGRFieldName.find(sInfo.osName);
6907 0 : if (oIter != oMapArrowFieldNameToOGRFieldName.end())
6908 0 : return oIter->second;
6909 0 : return sInfo.osName;
6910 0 : }();
6911 0 : sInfo.iOGRFieldIdx =
6912 0 : poFeatureDefn->GetFieldIndex(osExpectedOGRFieldName.c_str());
6913 0 : if (sInfo.iOGRFieldIdx >= 0)
6914 : {
6915 : const auto eOGRType =
6916 0 : poFeatureDefn->GetFieldDefn(sInfo.iOGRFieldIdx)->GetType();
6917 0 : sInfo.eTargetFieldType = eOGRType;
6918 : }
6919 0 : asFieldInfo.emplace_back(std::move(sInfo));
6920 : }
6921 : else
6922 : {
6923 9 : const std::string osNewPrefix(osFieldPrefix + fieldName + ".");
6924 39 : for (int64_t i = 0; i < array->n_children; ++i)
6925 : {
6926 30 : if (!BuildOGRFieldInfo(
6927 30 : schema->children[i], array->children[i], poFeatureDefn,
6928 : osNewPrefix, aosNativeTypes, bFallbackTypesUsed,
6929 : asFieldInfo, pszFIDName, pszGeomFieldName, poLayer,
6930 : oMapArrowFieldNameToOGRFieldName, schemaFIDColumn,
6931 : arrayFIDColumn))
6932 : {
6933 0 : return false;
6934 : }
6935 : }
6936 : }
6937 9 : return true;
6938 : }
6939 :
6940 1540 : FieldInfo sInfo;
6941 :
6942 789 : if (schema->dictionary &&
6943 19 : !IsKnownCodedFieldDomain(poLayer, schema->metadata))
6944 : {
6945 15 : if (!IsValidDictionaryIndexType(format))
6946 : {
6947 0 : CPLError(CE_Failure, CPLE_NotSupported,
6948 : "Dictionary only supported if the parent is of "
6949 : "type [U]Int[8|16|32|64]");
6950 0 : return false;
6951 : }
6952 :
6953 15 : sInfo.bUseDictionary = true;
6954 15 : schema = schema->dictionary;
6955 15 : format = schema->format;
6956 15 : array = array->dictionary;
6957 : }
6958 :
6959 770 : sInfo.osName = osFieldPrefix + fieldName;
6960 770 : sInfo.format = format;
6961 770 : if (pszFIDName && sInfo.osName == pszFIDName)
6962 : {
6963 35 : if (IsInt32(format) || IsInt64(format))
6964 : {
6965 34 : sInfo.iOGRFieldIdx = FID_COLUMN_SPECIAL_OGR_FIELD_IDX;
6966 34 : schemaFIDColumn = schema;
6967 34 : arrayFIDColumn = array;
6968 : }
6969 : else
6970 : {
6971 1 : CPLError(CE_Failure, CPLE_AppDefined,
6972 : "FID column '%s' should be of Arrow format 'i' "
6973 : "(int32) or 'l' (int64)",
6974 : sInfo.osName.c_str());
6975 1 : return false;
6976 : }
6977 : }
6978 : else
6979 : {
6980 : const std::string &osExpectedOGRFieldName =
6981 2204 : [&oMapArrowFieldNameToOGRFieldName, &sInfo]() -> const std::string &
6982 : {
6983 : const auto oIter =
6984 735 : oMapArrowFieldNameToOGRFieldName.find(sInfo.osName);
6985 735 : if (oIter != oMapArrowFieldNameToOGRFieldName.end())
6986 1 : return oIter->second;
6987 734 : return sInfo.osName;
6988 735 : }();
6989 735 : sInfo.iOGRFieldIdx =
6990 735 : poFeatureDefn->GetFieldIndex(osExpectedOGRFieldName.c_str());
6991 735 : if (sInfo.iOGRFieldIdx >= 0)
6992 : {
6993 655 : bool bTypeOK = false;
6994 : const auto eOGRType =
6995 655 : poFeatureDefn->GetFieldDefn(sInfo.iOGRFieldIdx)->GetType();
6996 655 : sInfo.eTargetFieldType = eOGRType;
6997 12083 : for (const auto &sType : gasArrowTypesToOGR)
6998 : {
6999 11740 : if (strcmp(format, sType.arrowType) == 0)
7000 : {
7001 312 : sInfo.bUseStringOptim = sType.eType == OFTString;
7002 312 : sInfo.eNominalFieldType = sType.eType;
7003 312 : if (eOGRType == sInfo.eNominalFieldType)
7004 : {
7005 281 : bTypeOK = true;
7006 281 : break;
7007 : }
7008 31 : else if (eOGRType == OFTString)
7009 : {
7010 4 : bFallbackTypesUsed = true;
7011 4 : bTypeOK = true;
7012 4 : break;
7013 : }
7014 27 : else if (eOGRType == OFTInteger &&
7015 10 : sType.eType == OFTInteger64)
7016 : {
7017 : // Potentially lossy.
7018 4 : CPLDebug("OGR",
7019 : "For field %s, writing from Arrow array of "
7020 : "type Int64 into OGR Int32 field. "
7021 : "Potentially loss conversion can happen",
7022 : sInfo.osName.c_str());
7023 4 : bFallbackTypesUsed = true;
7024 4 : bTypeOK = true;
7025 4 : break;
7026 : }
7027 23 : else if (eOGRType == OFTInteger && sType.eType == OFTReal)
7028 : {
7029 : // Potentially lossy.
7030 6 : CPLDebug("OGR",
7031 : "For field %s, writing from Arrow array of "
7032 : "type Real into OGR Int32 field. "
7033 : "Potentially loss conversion can happen",
7034 : sInfo.osName.c_str());
7035 6 : bFallbackTypesUsed = true;
7036 6 : bTypeOK = true;
7037 6 : break;
7038 : }
7039 17 : else if (eOGRType == OFTInteger64 && sType.eType == OFTReal)
7040 : {
7041 : // Potentially lossy.
7042 6 : CPLDebug("OGR",
7043 : "For field %s, writing from Arrow array of "
7044 : "type Real into OGR Int64 field. "
7045 : "Potentially loss conversion can happen",
7046 : sInfo.osName.c_str());
7047 6 : bFallbackTypesUsed = true;
7048 6 : bTypeOK = true;
7049 6 : break;
7050 : }
7051 11 : else if (eOGRType == OFTReal && sType.eType == OFTInteger64)
7052 : {
7053 : // Potentially lossy.
7054 4 : CPLDebug("OGR",
7055 : "For field %s, writing from Arrow array of "
7056 : "type Int64 into OGR Real field. "
7057 : "Potentially loss conversion can happen",
7058 : sInfo.osName.c_str());
7059 4 : bFallbackTypesUsed = true;
7060 4 : bTypeOK = true;
7061 4 : break;
7062 : }
7063 7 : else if ((eOGRType == OFTInteger64 ||
7064 4 : eOGRType == OFTReal) &&
7065 4 : sType.eType == OFTInteger)
7066 : {
7067 : // Non-lossy
7068 4 : bFallbackTypesUsed = true;
7069 4 : bTypeOK = true;
7070 4 : break;
7071 : }
7072 3 : else if (eOGRType == OFTDateTime &&
7073 3 : sType.eType == OFTString)
7074 : {
7075 3 : bFallbackTypesUsed = true;
7076 3 : bTypeOK = true;
7077 3 : break;
7078 : }
7079 : else
7080 : {
7081 0 : CPLError(CE_Failure, CPLE_AppDefined,
7082 : "For field %s, OGR field type is %s whereas "
7083 : "Arrow type implies %s",
7084 : sInfo.osName.c_str(),
7085 : OGR_GetFieldTypeName(eOGRType),
7086 0 : OGR_GetFieldTypeName(sType.eType));
7087 0 : return false;
7088 : }
7089 : }
7090 : }
7091 :
7092 655 : if (!bTypeOK && IsMap(format))
7093 : {
7094 106 : sInfo.eNominalFieldType = OFTString;
7095 106 : if (eOGRType == sInfo.eNominalFieldType)
7096 : {
7097 106 : bTypeOK = true;
7098 : }
7099 : else
7100 : {
7101 0 : CPLError(CE_Failure, CPLE_AppDefined,
7102 : "For field %s, OGR field type is %s whereas "
7103 : "Arrow type implies %s",
7104 : sInfo.osName.c_str(),
7105 : OGR_GetFieldTypeName(eOGRType),
7106 : OGR_GetFieldTypeName(OFTString));
7107 0 : return false;
7108 : }
7109 : }
7110 :
7111 655 : if (!bTypeOK && IsTimestamp(format))
7112 : {
7113 32 : sInfo.eNominalFieldType = OFTDateTime;
7114 32 : if (eOGRType == sInfo.eNominalFieldType)
7115 : {
7116 31 : bTypeOK = true;
7117 : }
7118 1 : else if (eOGRType == OFTString)
7119 : {
7120 1 : bFallbackTypesUsed = true;
7121 1 : bTypeOK = true;
7122 : }
7123 : else
7124 : {
7125 0 : CPLError(CE_Failure, CPLE_AppDefined,
7126 : "For field %s, OGR field type is %s whereas "
7127 : "Arrow type implies %s",
7128 : sInfo.osName.c_str(),
7129 : OGR_GetFieldTypeName(eOGRType),
7130 : OGR_GetFieldTypeName(OFTDateTime));
7131 0 : return false;
7132 : }
7133 : }
7134 :
7135 655 : if (!bTypeOK && IsFixedWidthBinary(format))
7136 : {
7137 5 : sInfo.eNominalFieldType = OFTBinary;
7138 5 : if (eOGRType == sInfo.eNominalFieldType)
7139 : {
7140 5 : bTypeOK = true;
7141 : }
7142 0 : else if (eOGRType == OFTString)
7143 : {
7144 0 : bFallbackTypesUsed = true;
7145 0 : bTypeOK = true;
7146 : }
7147 : else
7148 : {
7149 0 : CPLError(CE_Failure, CPLE_AppDefined,
7150 : "For field %s, OGR field type is %s whereas "
7151 : "Arrow type implies %s",
7152 : sInfo.osName.c_str(),
7153 : OGR_GetFieldTypeName(eOGRType),
7154 : OGR_GetFieldTypeName(OFTBinary));
7155 0 : return false;
7156 : }
7157 : }
7158 :
7159 728 : if (!bTypeOK && (IsList(format) || IsLargeList(format) ||
7160 73 : IsFixedSizeList(format)))
7161 : {
7162 190 : const char *childFormat = schema->children[0]->format;
7163 1565 : for (const auto &sType : gasListTypes)
7164 : {
7165 1544 : if (childFormat[0] == sType.arrowLetter &&
7166 169 : childFormat[1] == 0)
7167 : {
7168 169 : sInfo.eNominalFieldType = sType.eType;
7169 169 : if (eOGRType == sInfo.eNominalFieldType)
7170 : {
7171 154 : bTypeOK = true;
7172 154 : break;
7173 : }
7174 15 : else if (eOGRType == OFTString)
7175 : {
7176 15 : bFallbackTypesUsed = true;
7177 15 : bTypeOK = true;
7178 15 : break;
7179 : }
7180 : else
7181 : {
7182 0 : CPLError(CE_Failure, CPLE_AppDefined,
7183 : "For field %s, OGR field type is %s "
7184 : "whereas "
7185 : "Arrow type implies %s",
7186 : sInfo.osName.c_str(),
7187 : OGR_GetFieldTypeName(eOGRType),
7188 0 : OGR_GetFieldTypeName(sType.eType));
7189 0 : return false;
7190 : }
7191 : }
7192 : }
7193 :
7194 190 : if (!bTypeOK && IsStringView(childFormat))
7195 : {
7196 0 : sInfo.eNominalFieldType = OFTStringList;
7197 0 : if (eOGRType == sInfo.eNominalFieldType)
7198 : {
7199 0 : bTypeOK = true;
7200 : }
7201 0 : else if (eOGRType == OFTString)
7202 : {
7203 0 : bFallbackTypesUsed = true;
7204 0 : bTypeOK = true;
7205 : }
7206 : else
7207 : {
7208 0 : CPLError(CE_Failure, CPLE_AppDefined,
7209 : "For field %s, OGR field type is %s "
7210 : "whereas "
7211 : "Arrow type implies %s",
7212 : sInfo.osName.c_str(),
7213 : OGR_GetFieldTypeName(eOGRType),
7214 : OGR_GetFieldTypeName(OFTStringList));
7215 0 : return false;
7216 : }
7217 : }
7218 :
7219 190 : if (!bTypeOK && IsDecimal(childFormat))
7220 : {
7221 11 : if (!ParseDecimalFormat(childFormat, sInfo.nPrecision,
7222 : sInfo.nScale, sInfo.nWidthInBytes))
7223 : {
7224 0 : CPLError(CE_Failure, CPLE_AppDefined, "%s",
7225 0 : (std::string("Invalid field format ") +
7226 0 : childFormat + " for field " + osFieldPrefix +
7227 : fieldName)
7228 : .c_str());
7229 0 : return false;
7230 : }
7231 :
7232 11 : const char *pszError = GetErrorIfUnsupportedDecimal(
7233 : sInfo.nWidthInBytes, sInfo.nPrecision);
7234 11 : if (pszError)
7235 : {
7236 0 : CPLError(CE_Failure, CPLE_NotSupported, "%s", pszError);
7237 0 : return false;
7238 : }
7239 :
7240 11 : sInfo.eNominalFieldType = OFTRealList;
7241 11 : if (eOGRType == sInfo.eNominalFieldType)
7242 : {
7243 11 : bTypeOK = true;
7244 : }
7245 0 : else if (eOGRType == OFTString)
7246 : {
7247 0 : bFallbackTypesUsed = true;
7248 0 : bTypeOK = true;
7249 : }
7250 : else
7251 : {
7252 0 : CPLError(CE_Failure, CPLE_AppDefined,
7253 : "For field %s, OGR field type is %s whereas "
7254 : "Arrow type implies %s",
7255 : sInfo.osName.c_str(),
7256 : OGR_GetFieldTypeName(eOGRType),
7257 : OGR_GetFieldTypeName(OFTRealList));
7258 0 : return false;
7259 : }
7260 : }
7261 :
7262 190 : if (!bTypeOK && IsSupportForJSONObj(schema->children[0]))
7263 : {
7264 10 : sInfo.eNominalFieldType = OFTString;
7265 10 : if (eOGRType == sInfo.eNominalFieldType)
7266 : {
7267 10 : bTypeOK = true;
7268 : }
7269 : else
7270 : {
7271 0 : CPLError(CE_Failure, CPLE_AppDefined,
7272 : "For field %s, OGR field type is %s whereas "
7273 : "Arrow type implies %s",
7274 : sInfo.osName.c_str(),
7275 : OGR_GetFieldTypeName(eOGRType),
7276 : OGR_GetFieldTypeName(OFTString));
7277 0 : return false;
7278 : }
7279 : }
7280 :
7281 190 : if (!bTypeOK)
7282 : {
7283 0 : CPLError(CE_Failure, CPLE_NotSupported, "%s",
7284 0 : ("List of type '" + std::string(childFormat) +
7285 0 : "' for field " + osFieldPrefix + fieldName +
7286 : " is not supported.")
7287 : .c_str());
7288 0 : return false;
7289 : }
7290 : }
7291 :
7292 655 : if (!bTypeOK && IsDecimal(format))
7293 : {
7294 10 : if (!ParseDecimalFormat(format, sInfo.nPrecision, sInfo.nScale,
7295 : sInfo.nWidthInBytes))
7296 : {
7297 0 : CPLError(CE_Failure, CPLE_AppDefined, "%s",
7298 0 : (std::string("Invalid field format ") + format +
7299 0 : " for field " + osFieldPrefix + fieldName)
7300 : .c_str());
7301 0 : return false;
7302 : }
7303 :
7304 10 : const char *pszError = GetErrorIfUnsupportedDecimal(
7305 : sInfo.nWidthInBytes, sInfo.nPrecision);
7306 10 : if (pszError)
7307 : {
7308 0 : CPLError(CE_Failure, CPLE_NotSupported, "%s", pszError);
7309 0 : return false;
7310 : }
7311 :
7312 10 : sInfo.eNominalFieldType = OFTReal;
7313 10 : if (eOGRType == sInfo.eNominalFieldType)
7314 : {
7315 10 : bTypeOK = true;
7316 : }
7317 0 : else if (eOGRType == OFTString)
7318 : {
7319 0 : bFallbackTypesUsed = true;
7320 0 : bTypeOK = true;
7321 : }
7322 : else
7323 : {
7324 0 : CPLError(CE_Failure, CPLE_AppDefined,
7325 : "For field %s, OGR field type is %s whereas "
7326 : "Arrow type implies %s",
7327 : sInfo.osName.c_str(),
7328 : OGR_GetFieldTypeName(eOGRType),
7329 : OGR_GetFieldTypeName(OFTReal));
7330 0 : return false;
7331 : }
7332 : }
7333 :
7334 655 : if (!bTypeOK)
7335 : {
7336 0 : CPLError(CE_Failure, CPLE_NotSupported, "%s",
7337 0 : ("Type '" + std::string(format) + "' for field " +
7338 0 : osFieldPrefix + fieldName + " is not supported.")
7339 : .c_str());
7340 0 : return false;
7341 : }
7342 : }
7343 : else
7344 : {
7345 80 : sInfo.iOGRFieldIdx = poFeatureDefn->GetGeomFieldIndex(
7346 80 : osExpectedOGRFieldName.c_str());
7347 80 : if (sInfo.iOGRFieldIdx < 0)
7348 : {
7349 52 : if (pszGeomFieldName && pszGeomFieldName == sInfo.osName)
7350 : {
7351 47 : if (poFeatureDefn->GetGeomFieldCount() == 0)
7352 : {
7353 0 : CPLError(CE_Failure, CPLE_AppDefined,
7354 : "Cannot find OGR geometry field for Arrow "
7355 : "array %s",
7356 : sInfo.osName.c_str());
7357 0 : return false;
7358 : }
7359 47 : sInfo.iOGRFieldIdx = 0;
7360 : }
7361 : else
7362 : {
7363 : // Check if ARROW:extension:name = ogc.wkb or geoarrow.wkb
7364 5 : const char *pabyMetadata = schema->metadata;
7365 5 : if (pabyMetadata)
7366 : {
7367 : const auto oMetadata =
7368 5 : OGRParseArrowMetadata(pabyMetadata);
7369 5 : auto oIter = oMetadata.find(ARROW_EXTENSION_NAME_KEY);
7370 10 : if (oIter != oMetadata.end() &&
7371 5 : (oIter->second == EXTENSION_NAME_OGC_WKB ||
7372 0 : oIter->second == EXTENSION_NAME_GEOARROW_WKB))
7373 : {
7374 5 : if (poFeatureDefn->GetGeomFieldCount() == 0)
7375 : {
7376 0 : CPLError(CE_Failure, CPLE_AppDefined,
7377 : "Cannot find OGR geometry field "
7378 : "for Arrow array %s",
7379 : sInfo.osName.c_str());
7380 0 : return false;
7381 : }
7382 5 : sInfo.iOGRFieldIdx = 0;
7383 : }
7384 : }
7385 : }
7386 :
7387 52 : if (sInfo.iOGRFieldIdx < 0)
7388 : {
7389 0 : CPLError(CE_Failure, CPLE_AppDefined,
7390 : "Cannot find OGR field for Arrow array %s",
7391 : sInfo.osName.c_str());
7392 0 : return false;
7393 : }
7394 : }
7395 :
7396 80 : if (!IsBinary(format) && !IsLargeBinary(format))
7397 : {
7398 0 : CPLError(CE_Failure, CPLE_AppDefined,
7399 : "Geometry column '%s' should be of Arrow format "
7400 : "'z' (binary) or 'Z' (large binary)",
7401 : sInfo.osName.c_str());
7402 0 : return false;
7403 : }
7404 80 : sInfo.bIsGeomCol = true;
7405 : }
7406 : }
7407 :
7408 769 : asFieldInfo.emplace_back(std::move(sInfo));
7409 769 : return true;
7410 : }
7411 :
7412 : /************************************************************************/
7413 : /* GetUInt64Value() */
7414 : /************************************************************************/
7415 :
7416 90 : static inline uint64_t GetUInt64Value(const struct ArrowSchema *schema,
7417 : const struct ArrowArray *array,
7418 : size_t iFeature)
7419 : {
7420 90 : uint64_t nVal = 0;
7421 90 : CPLAssert(schema->format[1] == 0);
7422 90 : switch (schema->format[0])
7423 : {
7424 8 : case ARROW_LETTER_INT8:
7425 8 : nVal = GetValue<int8_t>(array, iFeature);
7426 8 : break;
7427 8 : case ARROW_LETTER_UINT8:
7428 8 : nVal = GetValue<uint8_t>(array, iFeature);
7429 8 : break;
7430 8 : case ARROW_LETTER_INT16:
7431 8 : nVal = GetValue<int16_t>(array, iFeature);
7432 8 : break;
7433 8 : case ARROW_LETTER_UINT16:
7434 8 : nVal = GetValue<uint16_t>(array, iFeature);
7435 8 : break;
7436 34 : case ARROW_LETTER_INT32:
7437 34 : nVal = GetValue<int32_t>(array, iFeature);
7438 34 : break;
7439 8 : case ARROW_LETTER_UINT32:
7440 8 : nVal = GetValue<uint32_t>(array, iFeature);
7441 8 : break;
7442 8 : case ARROW_LETTER_INT64:
7443 8 : nVal = GetValue<int64_t>(array, iFeature);
7444 8 : break;
7445 8 : case ARROW_LETTER_UINT64:
7446 8 : nVal = GetValue<uint64_t>(array, iFeature);
7447 8 : break;
7448 0 : default:
7449 : // Shouldn't happen given checks in BuildOGRFieldInfo()
7450 0 : CPLAssert(false);
7451 : break;
7452 : }
7453 90 : return nVal;
7454 : }
7455 :
7456 : /************************************************************************/
7457 : /* GetWorkingBufferSize() */
7458 : /************************************************************************/
7459 :
7460 1382830 : static size_t GetWorkingBufferSize(const struct ArrowSchema *schema,
7461 : const struct ArrowArray *array,
7462 : size_t iFeature, int &iArrowIdxInOut,
7463 : const std::vector<FieldInfo> &asFieldInfo)
7464 : {
7465 1382830 : const char *fieldName = schema->name;
7466 1382830 : const char *format = schema->format;
7467 1382830 : const int iArrowIdx = iArrowIdxInOut;
7468 1382830 : if (IsStructure(format))
7469 : {
7470 60426 : if (asFieldInfo[iArrowIdx].eNominalFieldType == OFTDateTime)
7471 : {
7472 0 : ++iArrowIdxInOut;
7473 0 : return 0;
7474 : }
7475 :
7476 60426 : size_t nRet = 0;
7477 1382850 : for (int64_t i = 0; i < array->n_children; ++i)
7478 : {
7479 1322420 : nRet += GetWorkingBufferSize(
7480 1322420 : schema->children[i], array->children[i],
7481 1322420 : iFeature + static_cast<size_t>(array->offset), iArrowIdxInOut,
7482 : asFieldInfo);
7483 : }
7484 60426 : return nRet;
7485 : }
7486 1322400 : ++iArrowIdxInOut;
7487 :
7488 1322400 : if (!asFieldInfo[iArrowIdx].bUseStringOptim)
7489 122188 : return 0;
7490 :
7491 1200210 : const uint8_t *pabyValidity =
7492 1200210 : static_cast<const uint8_t *>(array->buffers[0]);
7493 1200340 : if (array->null_count != 0 && pabyValidity &&
7494 121 : !TestBit(pabyValidity, static_cast<size_t>(iFeature + array->offset)))
7495 : {
7496 : // empty string
7497 57 : return 0;
7498 : }
7499 :
7500 1200160 : if (asFieldInfo[iArrowIdx].bUseDictionary)
7501 : {
7502 41 : const uint64_t nDictIdx = GetUInt64Value(schema, array, iFeature);
7503 41 : const auto dictArray = array->dictionary;
7504 41 : if (nDictIdx >= static_cast<uint64_t>(dictArray->length))
7505 : {
7506 1 : CPLError(CE_Failure, CPLE_AppDefined,
7507 : "Feature %" PRIu64
7508 : ", field %s: invalid dictionary index: %" PRIu64,
7509 : static_cast<uint64_t>(iFeature), fieldName, nDictIdx);
7510 1 : return 0;
7511 : }
7512 :
7513 40 : array = dictArray;
7514 40 : schema = schema->dictionary;
7515 40 : format = schema->format;
7516 40 : iFeature = static_cast<size_t>(nDictIdx);
7517 : }
7518 :
7519 1200160 : constexpr size_t SZ_NUL_TERMINATOR = 1;
7520 1200160 : if (IsString(format))
7521 : {
7522 1200150 : const auto *panOffsets =
7523 1200150 : static_cast<const uint32_t *>(array->buffers[1]) + array->offset;
7524 1200150 : return (panOffsets[iFeature + 1] - panOffsets[iFeature]) +
7525 1200150 : SZ_NUL_TERMINATOR;
7526 : }
7527 10 : else if (IsLargeString(format))
7528 : {
7529 10 : const auto *panOffsets =
7530 10 : static_cast<const uint64_t *>(array->buffers[1]) + array->offset;
7531 10 : return static_cast<size_t>(panOffsets[iFeature + 1] -
7532 10 : panOffsets[iFeature]) +
7533 10 : SZ_NUL_TERMINATOR;
7534 : }
7535 0 : else if (IsStringView(format))
7536 : {
7537 0 : const auto *panStringView =
7538 0 : static_cast<const uint32_t *>(array->buffers[1]) +
7539 0 : array->offset * N_VALUES_PER_STRING_VIEW;
7540 0 : return panStringView[iFeature * N_VALUES_PER_STRING_VIEW] +
7541 0 : SZ_NUL_TERMINATOR;
7542 : }
7543 0 : return 0;
7544 : }
7545 :
7546 : /************************************************************************/
7547 : /* FillField() */
7548 : /************************************************************************/
7549 :
7550 : template <typename ArrowType, typename OGRType = ArrowType>
7551 462 : inline static void FillField(const struct ArrowArray *array, int iOGRFieldIdx,
7552 : size_t iFeature, OGRFeature &oFeature)
7553 : {
7554 462 : const auto *panValues = static_cast<const ArrowType *>(array->buffers[1]);
7555 462 : oFeature.SetFieldSameTypeUnsafe(
7556 : iOGRFieldIdx,
7557 462 : static_cast<OGRType>(panValues[iFeature + array->offset]));
7558 462 : }
7559 :
7560 : /************************************************************************/
7561 : /* FillFieldString() */
7562 : /************************************************************************/
7563 :
7564 : template <typename OffsetType>
7565 : inline static void
7566 1200160 : FillFieldString(const struct ArrowArray *array, int iOGRFieldIdx,
7567 : size_t iFeature, int iArrowIdx,
7568 : const std::vector<FieldInfo> &asFieldInfo,
7569 : std::string &osWorkingBuffer, OGRFeature &oFeature)
7570 : {
7571 1200160 : const auto *panOffsets =
7572 1200160 : static_cast<const OffsetType *>(array->buffers[1]) + array->offset;
7573 1200160 : const char *pszStr = static_cast<const char *>(array->buffers[2]);
7574 1200160 : const size_t nLen =
7575 1200160 : static_cast<size_t>(panOffsets[iFeature + 1] - panOffsets[iFeature]);
7576 1200160 : if (asFieldInfo[iArrowIdx].bUseStringOptim)
7577 : {
7578 1200160 : oFeature.SetFieldSameTypeUnsafe(
7579 1200160 : iOGRFieldIdx, &osWorkingBuffer[0] + osWorkingBuffer.size());
7580 1200160 : osWorkingBuffer.append(pszStr + panOffsets[iFeature], nLen);
7581 1200160 : osWorkingBuffer.push_back(0); // append null character
7582 : }
7583 : else
7584 : {
7585 0 : oFeature.SetField(iOGRFieldIdx, std::string_view(pszStr, nLen));
7586 : }
7587 1200160 : }
7588 :
7589 : /************************************************************************/
7590 : /* FillFieldStringView() */
7591 : /************************************************************************/
7592 :
7593 : inline static void
7594 0 : FillFieldStringView(const struct ArrowArray *array, int iOGRFieldIdx,
7595 : size_t iFeature, int iArrowIdx,
7596 : const std::vector<FieldInfo> &asFieldInfo,
7597 : std::string &osWorkingBuffer, OGRFeature &oFeature)
7598 : {
7599 0 : const auto sv = GetStringView(array, iFeature);
7600 0 : if (asFieldInfo[iArrowIdx].bUseStringOptim)
7601 : {
7602 0 : oFeature.SetFieldSameTypeUnsafe(
7603 0 : iOGRFieldIdx, &osWorkingBuffer[0] + osWorkingBuffer.size());
7604 0 : osWorkingBuffer.append(sv);
7605 0 : osWorkingBuffer.push_back(0); // append null character
7606 : }
7607 : else
7608 : {
7609 0 : oFeature.SetField(iOGRFieldIdx, sv);
7610 : }
7611 0 : }
7612 :
7613 : /************************************************************************/
7614 : /* FillFieldBinary() */
7615 : /************************************************************************/
7616 :
7617 : template <typename OffsetType>
7618 : inline static bool
7619 60344 : FillFieldBinary(const struct ArrowArray *array, int iOGRFieldIdx,
7620 : size_t iFeature, int iArrowIdx,
7621 : const std::vector<FieldInfo> &asFieldInfo,
7622 : const std::string &osFieldPrefix, const char *pszFieldName,
7623 : OGRFeature &oFeature)
7624 : {
7625 60344 : const auto *panOffsets =
7626 60344 : static_cast<const OffsetType *>(array->buffers[1]) + array->offset;
7627 60344 : const GByte *pabyData = static_cast<const GByte *>(array->buffers[2]) +
7628 60344 : static_cast<size_t>(panOffsets[iFeature]);
7629 60344 : const size_t nLen =
7630 60344 : static_cast<size_t>(panOffsets[iFeature + 1] - panOffsets[iFeature]);
7631 60344 : if (asFieldInfo[iArrowIdx].bIsGeomCol)
7632 : {
7633 60316 : size_t nBytesConsumedOut = 0;
7634 :
7635 : // Check if we can reuse the existing geometry, to save dynamic memory
7636 : // allocations.
7637 60316 : if (nLen >= 5 && pabyData[0] == wkbNDR && pabyData[1] <= wkbTriangle &&
7638 60309 : pabyData[2] == 0 && pabyData[3] == 0 && pabyData[4] == 0)
7639 : {
7640 60309 : const auto poExistingGeom = oFeature.GetGeomFieldRef(iOGRFieldIdx);
7641 120570 : if (poExistingGeom &&
7642 60261 : poExistingGeom->getGeometryType() == pabyData[1])
7643 : {
7644 60261 : poExistingGeom->importFromWkb(pabyData, nLen, wkbVariantIso,
7645 : nBytesConsumedOut);
7646 60261 : return true;
7647 : }
7648 : }
7649 :
7650 55 : OGRGeometry *poGeometry = nullptr;
7651 55 : OGRGeometryFactory::createFromWkb(pabyData, nullptr, &poGeometry, nLen,
7652 : wkbVariantIso, nBytesConsumedOut);
7653 55 : oFeature.SetGeomFieldDirectly(iOGRFieldIdx, poGeometry);
7654 : }
7655 : else
7656 : {
7657 28 : if (nLen > static_cast<size_t>(std::numeric_limits<int>::max()))
7658 : {
7659 0 : CPLError(CE_Failure, CPLE_NotSupported,
7660 : "Content for field %s%s is too large",
7661 : osFieldPrefix.c_str(), pszFieldName);
7662 0 : return false;
7663 : }
7664 28 : oFeature.SetField(iOGRFieldIdx, static_cast<int>(nLen), pabyData);
7665 : }
7666 83 : return true;
7667 : }
7668 :
7669 : /************************************************************************/
7670 : /* FillFeature() */
7671 : /************************************************************************/
7672 :
7673 1322420 : static bool FillFeature(OGRLayer *poLayer, const struct ArrowSchema *schema,
7674 : const struct ArrowArray *array,
7675 : const std::string &osFieldPrefix, size_t iFeature,
7676 : int &iArrowIdxInOut,
7677 : const std::vector<FieldInfo> &asFieldInfo,
7678 : OGRFeature &oFeature, std::string &osWorkingBuffer)
7679 :
7680 : {
7681 1322420 : const char *fieldName = schema->name;
7682 1322420 : const char *format = schema->format;
7683 1322420 : const int iArrowIdx = iArrowIdxInOut;
7684 1322420 : if (IsStructure(format))
7685 : {
7686 19 : if (asFieldInfo[iArrowIdx].eNominalFieldType == OFTDateTime)
7687 : {
7688 0 : ++iArrowIdxInOut;
7689 0 : const int iOGRFieldIdx = asFieldInfo[iArrowIdx].iOGRFieldIdx;
7690 :
7691 0 : if (array->null_count != 0)
7692 : {
7693 0 : const uint8_t *pabyValidity =
7694 0 : static_cast<const uint8_t *>(array->buffers[0]);
7695 0 : if (pabyValidity &&
7696 0 : !TestBit(pabyValidity,
7697 0 : static_cast<size_t>(iFeature + array->offset)))
7698 : {
7699 0 : OGRField *psField = oFeature.GetRawFieldRef(iOGRFieldIdx);
7700 0 : OGR_RawField_SetNull(psField);
7701 : }
7702 : }
7703 :
7704 0 : const auto *panTimestamps =
7705 0 : static_cast<const int64_t *>(array->children[0]->buffers[1]);
7706 0 : int64_t nTimestamp = panTimestamps[iFeature + array->offset];
7707 0 : const auto *panOffsetsMinutes =
7708 0 : static_cast<const int16_t *>(array->children[1]->buffers[1]);
7709 0 : const int nOffsetMinute =
7710 0 : panOffsetsMinutes[iFeature + array->offset];
7711 0 : const int nTZFlag =
7712 0 : nOffsetMinute >= -14 * 60 && nOffsetMinute <= 14 * 60
7713 0 : ? OGR_TZFLAG_UTC + nOffsetMinute / 15
7714 : : OGR_TZFLAG_UTC;
7715 :
7716 0 : const char *formatTS = schema->children[0]->format;
7717 : const int nInvFactorToSecond =
7718 0 : IsTimestampSeconds(formatTS) ? 1
7719 0 : : IsTimestampMilliseconds(formatTS) ? 1000
7720 0 : : IsTimestampMicroseconds(formatTS) ? 1000 * 1000
7721 0 : : IsTimestampNanoseconds(formatTS) ? 1000 * 1000 * 1000
7722 0 : : 1;
7723 0 : double floatingPart = 0;
7724 0 : if (nInvFactorToSecond)
7725 : {
7726 0 : floatingPart = (nTimestamp % nInvFactorToSecond) /
7727 0 : double(nInvFactorToSecond);
7728 0 : nTimestamp /= nInvFactorToSecond;
7729 : }
7730 0 : nTimestamp += (nTZFlag - OGR_TZFLAG_UTC) * 15 * 60;
7731 : struct tm dt;
7732 0 : CPLUnixTimeToYMDHMS(nTimestamp, &dt);
7733 0 : oFeature.SetField(iOGRFieldIdx, dt.tm_year + 1900, dt.tm_mon + 1,
7734 : dt.tm_mday, dt.tm_hour, dt.tm_min,
7735 0 : static_cast<float>(dt.tm_sec + floatingPart),
7736 : nTZFlag);
7737 : }
7738 : else
7739 : {
7740 19 : const std::string osNewPrefix(osFieldPrefix + fieldName + ".");
7741 78 : for (int64_t i = 0; i < array->n_children; ++i)
7742 : {
7743 59 : if (!FillFeature(poLayer, schema->children[i],
7744 59 : array->children[i], osNewPrefix,
7745 59 : iFeature + static_cast<size_t>(array->offset),
7746 : iArrowIdxInOut, asFieldInfo, oFeature,
7747 : osWorkingBuffer))
7748 0 : return false;
7749 : }
7750 : }
7751 19 : return true;
7752 : }
7753 1322400 : ++iArrowIdxInOut;
7754 1322400 : const int iOGRFieldIdx = asFieldInfo[iArrowIdx].iOGRFieldIdx;
7755 :
7756 1322400 : if (asFieldInfo[iArrowIdx].bUseDictionary)
7757 : {
7758 62 : format = schema->dictionary->format;
7759 : }
7760 :
7761 1322400 : if (array->null_count != 0)
7762 : {
7763 893 : const uint8_t *pabyValidity =
7764 893 : static_cast<const uint8_t *>(array->buffers[0]);
7765 1786 : if (pabyValidity &&
7766 893 : !TestBit(pabyValidity,
7767 893 : static_cast<size_t>(iFeature + array->offset)))
7768 : {
7769 298 : if (iOGRFieldIdx == FID_COLUMN_SPECIAL_OGR_FIELD_IDX)
7770 5 : oFeature.SetFID(OGRNullFID);
7771 293 : else if (asFieldInfo[iArrowIdx].bIsGeomCol)
7772 70 : oFeature.SetGeomFieldDirectly(iOGRFieldIdx, nullptr);
7773 223 : else if (asFieldInfo[iArrowIdx].eSetFeatureFieldType == OFTString)
7774 : {
7775 120 : OGRField *psField = oFeature.GetRawFieldRef(iOGRFieldIdx);
7776 120 : if (!asFieldInfo[iArrowIdx].bUseStringOptim)
7777 : {
7778 63 : if (IsValidField(psField))
7779 : {
7780 51 : CPLFree(psField->String);
7781 51 : OGR_RawField_SetNull(psField);
7782 : }
7783 : }
7784 : else
7785 : {
7786 57 : OGR_RawField_SetNull(psField);
7787 : }
7788 : }
7789 : else
7790 : {
7791 103 : OGRField *psField = oFeature.GetRawFieldRef(iOGRFieldIdx);
7792 103 : switch (asFieldInfo[iArrowIdx].eSetFeatureFieldType)
7793 : {
7794 47 : case OFTRealList:
7795 : case OFTIntegerList:
7796 : case OFTInteger64List:
7797 47 : if (IsValidField(psField))
7798 47 : CPLFree(psField->IntegerList.paList);
7799 47 : break;
7800 :
7801 7 : case OFTStringList:
7802 7 : if (IsValidField(psField))
7803 7 : CSLDestroy(psField->StringList.paList);
7804 7 : break;
7805 :
7806 1 : case OFTBinary:
7807 1 : if (IsValidField(psField))
7808 1 : CPLFree(psField->Binary.paData);
7809 1 : break;
7810 :
7811 48 : default:
7812 48 : break;
7813 : }
7814 103 : OGR_RawField_SetNull(psField);
7815 : }
7816 298 : return true;
7817 : }
7818 : }
7819 :
7820 1322100 : if (asFieldInfo[iArrowIdx].bUseDictionary)
7821 : {
7822 49 : const uint64_t nDictIdx = GetUInt64Value(schema, array, iFeature);
7823 49 : auto dictArray = array->dictionary;
7824 49 : if (nDictIdx >= static_cast<uint64_t>(dictArray->length))
7825 : {
7826 2 : CPLError(CE_Failure, CPLE_AppDefined,
7827 : "Feature %" PRIu64
7828 : ", field %s: invalid dictionary index: %" PRIu64,
7829 : static_cast<uint64_t>(iFeature),
7830 4 : (osFieldPrefix + fieldName).c_str(), nDictIdx);
7831 2 : return false;
7832 : }
7833 47 : array = dictArray;
7834 47 : schema = schema->dictionary;
7835 47 : iFeature = static_cast<size_t>(nDictIdx);
7836 : }
7837 :
7838 1322100 : if (IsBoolean(format))
7839 : {
7840 12 : const uint8_t *pabyValues =
7841 12 : static_cast<const uint8_t *>(array->buffers[1]);
7842 12 : oFeature.SetFieldSameTypeUnsafe(
7843 : iOGRFieldIdx,
7844 12 : TestBit(pabyValues, static_cast<size_t>(iFeature + array->offset))
7845 : ? 1
7846 : : 0);
7847 12 : return true;
7848 : }
7849 1322090 : else if (IsInt8(format))
7850 : {
7851 10 : FillField<int8_t>(array, iOGRFieldIdx, iFeature, oFeature);
7852 10 : return true;
7853 : }
7854 1322080 : else if (IsUInt8(format))
7855 : {
7856 10 : FillField<uint8_t>(array, iOGRFieldIdx, iFeature, oFeature);
7857 10 : return true;
7858 : }
7859 1322070 : else if (IsInt16(format))
7860 : {
7861 12 : FillField<int16_t>(array, iOGRFieldIdx, iFeature, oFeature);
7862 12 : return true;
7863 : }
7864 1322060 : else if (IsUInt16(format))
7865 : {
7866 10 : FillField<uint16_t>(array, iOGRFieldIdx, iFeature, oFeature);
7867 10 : return true;
7868 : }
7869 1322050 : else if (IsInt32(format))
7870 : {
7871 250 : if (iOGRFieldIdx == FID_COLUMN_SPECIAL_OGR_FIELD_IDX)
7872 : {
7873 2 : const auto *panValues =
7874 2 : static_cast<const int32_t *>(array->buffers[1]);
7875 2 : oFeature.SetFID(panValues[iFeature + array->offset]);
7876 : }
7877 : else
7878 : {
7879 248 : FillField<int32_t>(array, iOGRFieldIdx, iFeature, oFeature);
7880 : }
7881 250 : return true;
7882 : }
7883 1321800 : else if (IsUInt32(format))
7884 : {
7885 4 : FillField<uint32_t, GIntBig>(array, iOGRFieldIdx, iFeature, oFeature);
7886 4 : return true;
7887 : }
7888 1321790 : else if (IsInt64(format))
7889 : {
7890 60392 : if (iOGRFieldIdx == FID_COLUMN_SPECIAL_OGR_FIELD_IDX)
7891 : {
7892 60304 : const auto *panValues =
7893 60304 : static_cast<const int64_t *>(array->buffers[1]);
7894 60304 : oFeature.SetFID(panValues[iFeature + array->offset]);
7895 : }
7896 : else
7897 : {
7898 88 : FillField<int64_t, GIntBig>(array, iOGRFieldIdx, iFeature,
7899 : oFeature);
7900 : }
7901 60392 : return true;
7902 : }
7903 1261400 : else if (IsUInt64(format))
7904 : {
7905 10 : FillField<uint64_t, double>(array, iOGRFieldIdx, iFeature, oFeature);
7906 10 : return true;
7907 : }
7908 1261390 : else if (IsFloat32(format))
7909 : {
7910 12 : FillField<float, double>(array, iOGRFieldIdx, iFeature, oFeature);
7911 12 : return true;
7912 : }
7913 1261380 : else if (IsFloat64(format))
7914 : {
7915 58 : FillField<double>(array, iOGRFieldIdx, iFeature, oFeature);
7916 58 : return true;
7917 : }
7918 1261320 : else if (IsString(format))
7919 : {
7920 1200150 : FillFieldString<uint32_t>(array, iOGRFieldIdx, iFeature, iArrowIdx,
7921 : asFieldInfo, osWorkingBuffer, oFeature);
7922 1200150 : return true;
7923 : }
7924 61176 : else if (IsLargeString(format))
7925 : {
7926 10 : FillFieldString<uint64_t>(array, iOGRFieldIdx, iFeature, iArrowIdx,
7927 : asFieldInfo, osWorkingBuffer, oFeature);
7928 10 : return true;
7929 : }
7930 61166 : else if (IsStringView(format))
7931 : {
7932 0 : FillFieldStringView(array, iOGRFieldIdx, iFeature, iArrowIdx,
7933 : asFieldInfo, osWorkingBuffer, oFeature);
7934 0 : return true;
7935 : }
7936 61166 : else if (IsBinary(format))
7937 : {
7938 60328 : return FillFieldBinary<uint32_t>(array, iOGRFieldIdx, iFeature,
7939 : iArrowIdx, asFieldInfo, osFieldPrefix,
7940 60328 : fieldName, oFeature);
7941 : }
7942 838 : else if (IsLargeBinary(format))
7943 : {
7944 16 : return FillFieldBinary<uint64_t>(array, iOGRFieldIdx, iFeature,
7945 : iArrowIdx, asFieldInfo, osFieldPrefix,
7946 16 : fieldName, oFeature);
7947 : }
7948 822 : else if (asFieldInfo[iArrowIdx].nPrecision > 0)
7949 : {
7950 : // fits on a int64
7951 46 : CPLAssert(asFieldInfo[iArrowIdx].nPrecision <= 19);
7952 : // either 128 or 256 bits
7953 46 : CPLAssert((asFieldInfo[iArrowIdx].nWidthInBytes % 8) == 0);
7954 46 : const int nWidthIn64BitWord = asFieldInfo[iArrowIdx].nWidthInBytes / 8;
7955 :
7956 46 : if (IsList(format))
7957 : {
7958 16 : const auto panOffsets =
7959 16 : static_cast<const uint32_t *>(array->buffers[1]) +
7960 16 : array->offset;
7961 16 : const auto childArray = array->children[0];
7962 16 : std::vector<double> aValues;
7963 33 : for (auto i = panOffsets[iFeature]; i < panOffsets[iFeature + 1];
7964 : ++i)
7965 : {
7966 17 : aValues.push_back(GetValueDecimal(childArray, nWidthIn64BitWord,
7967 17 : asFieldInfo[iArrowIdx].nScale,
7968 : i));
7969 : }
7970 16 : oFeature.SetField(iOGRFieldIdx, static_cast<int>(aValues.size()),
7971 16 : aValues.data());
7972 16 : return true;
7973 : }
7974 30 : else if (IsLargeList(format))
7975 : {
7976 4 : const auto panOffsets =
7977 4 : static_cast<const uint64_t *>(array->buffers[1]) +
7978 4 : array->offset;
7979 4 : const auto childArray = array->children[0];
7980 4 : std::vector<double> aValues;
7981 4 : for (auto i = static_cast<size_t>(panOffsets[iFeature]);
7982 9 : i < static_cast<size_t>(panOffsets[iFeature + 1]); ++i)
7983 : {
7984 5 : aValues.push_back(GetValueDecimal(childArray, nWidthIn64BitWord,
7985 5 : asFieldInfo[iArrowIdx].nScale,
7986 : i));
7987 : }
7988 4 : oFeature.SetField(iOGRFieldIdx, static_cast<int>(aValues.size()),
7989 4 : aValues.data());
7990 4 : return true;
7991 : }
7992 26 : else if (IsFixedSizeList(format))
7993 : {
7994 4 : const int nVals = GetFixedSizeList(format);
7995 4 : const auto childArray = array->children[0];
7996 4 : std::vector<double> aValues;
7997 12 : for (int i = 0; i < nVals; ++i)
7998 : {
7999 8 : aValues.push_back(GetValueDecimal(childArray, nWidthIn64BitWord,
8000 8 : asFieldInfo[iArrowIdx].nScale,
8001 8 : iFeature * nVals + i));
8002 : }
8003 4 : oFeature.SetField(iOGRFieldIdx, nVals, aValues.data());
8004 4 : return true;
8005 : }
8006 :
8007 22 : CPLAssert(format[0] == ARROW_LETTER_DECIMAL);
8008 :
8009 22 : oFeature.SetFieldSameTypeUnsafe(
8010 : iOGRFieldIdx,
8011 : GetValueDecimal(array, nWidthIn64BitWord,
8012 22 : asFieldInfo[iArrowIdx].nScale, iFeature));
8013 22 : return true;
8014 : }
8015 776 : else if (SetFieldForOtherFormats(
8016 : oFeature, iOGRFieldIdx,
8017 776 : static_cast<size_t>(iFeature + array->offset), schema, array))
8018 : {
8019 776 : return true;
8020 : }
8021 :
8022 0 : CPLError(CE_Failure, CPLE_NotSupported, "%s",
8023 0 : ("Type '" + std::string(format) + "' for field " + osFieldPrefix +
8024 0 : fieldName + " is not supported.")
8025 : .c_str());
8026 0 : return false;
8027 : }
8028 :
8029 : /************************************************************************/
8030 : /* OGRLayer::WriteArrowBatch() */
8031 : /************************************************************************/
8032 :
8033 : // clang-format off
8034 : /** Writes a batch of rows from an ArrowArray.
8035 : *
8036 : * This is semantically close to calling CreateFeature() with multiple features
8037 : * at once.
8038 : *
8039 : * The ArrowArray must be of type struct (format=+s), and its children generally
8040 : * map to a OGR attribute or geometry field (unless they are struct themselves).
8041 : *
8042 : * Method IsArrowSchemaSupported() can be called to determine if the schema
8043 : * will be supported by WriteArrowBatch().
8044 : *
8045 : * OGR fields for the corresponding children arrays must exist and be of a
8046 : * compatible type. For attribute fields, they should generally be created with
8047 : * CreateFieldFromArrowSchema(). This is strictly required for output drivers
8048 : * Arrow or Parquet, and strongly recommended otherwise. For geometry fields,
8049 : * they should be created either implicitly at CreateLayer() type
8050 : * (if geom_type != wkbNone), or explicitly with CreateGeomField().
8051 : *
8052 : * Starting with GDAL 3.9, some tolerance has been introduced in the base
8053 : * implementation of WriteArrowBatch() for scenarios that involve appending to
8054 : * an already existing output layer when the input Arrow field type and the
8055 : * OGR layer field type are 32/64-bi integers or real number, but do not match
8056 : * exactly, which may cause lossy conversions. The IF_FIELD_NOT_PRESERVED option
8057 : * can be used to control the behavior in case of lossy conversion.
8058 : *
8059 : * Arrays for geometry columns should be of binary or large binary type and
8060 : * contain WKB geometry.
8061 : *
8062 : * Note that the passed array may be set to a released state
8063 : * (array->release==NULL) after this call (not by the base implementation,
8064 : * but in specialized ones such as Parquet or Arrow for example)
8065 : *
8066 : * Supported options of the base implementation are:
8067 : * <ul>
8068 : * <li>FID=name. Name of the FID column in the array. If not provided,
8069 : * GetFIDColumn() is used to determine it. The special name
8070 : * OGRLayer::DEFAULT_ARROW_FID_NAME is also recognized if neither FID nor
8071 : * GetFIDColumn() are set.
8072 : * The corresponding ArrowArray must be of type int32 (i) or int64 (l).
8073 : * On input, values of the FID column are used to create the feature.
8074 : * On output, the values of the FID column may be set with the FID of the
8075 : * created feature (if the array is not released).
8076 : * </li>
8077 : * <li>IF_FID_NOT_PRESERVED=NOTHING/ERROR/WARNING. Action to perform when the
8078 : * input FID is not preserved in the output layer. The default is NOTHING.
8079 : * Setting it to ERROR will cause the function to error out. Setting it
8080 : * to WARNING will cause the function to emit a warning but continue its
8081 : * processing.
8082 : * </li>
8083 : * <li>IF_FIELD_NOT_PRESERVED=ERROR/WARNING. (since GDAL 3.9)
8084 : * Action to perform when the input field value is not preserved in the
8085 : * output layer.
8086 : * The default is WARNING, which will cause the function to emit a warning
8087 : * but continue its processing.
8088 : * Setting it to ERROR will cause the function to error out if a lossy
8089 : * conversion is detected.
8090 : * </li>
8091 : * <li>GEOMETRY_NAME=name. Name of the geometry column. If not provided,
8092 : * GetGeometryColumn() is used. The special name
8093 : * OGRLayer::DEFAULT_ARROW_GEOMETRY_NAME is also recognized if neither
8094 : * GEOMETRY_NAME nor GetGeometryColumn() are set.
8095 : * Geometry columns are also identified if they have
8096 : * ARROW:extension:name=ogc.wkb as a field metadata.
8097 : * The corresponding ArrowArray must be of type binary (w) or large
8098 : * binary (W).
8099 : * </li>
8100 : * </ul>
8101 : *
8102 : * The following example demonstrates how to copy a layer from one format to
8103 : * another one (assuming it has at most a single geometry column):
8104 : \code{.py}
8105 : def copy_layer(src_lyr, out_filename, out_format, lcos = {}):
8106 : stream = src_lyr.GetArrowStream()
8107 : schema = stream.GetSchema()
8108 :
8109 : # If the source layer has a FID column and the output driver supports
8110 : # a FID layer creation option, set it to the source FID column name.
8111 : if src_lyr.GetFIDColumn():
8112 : creationOptions = gdal.GetDriverByName(out_format).GetMetadataItem(
8113 : "DS_LAYER_CREATIONOPTIONLIST"
8114 : )
8115 : if creationOptions and '"FID"' in creationOptions:
8116 : lcos["FID"] = src_lyr.GetFIDColumn()
8117 :
8118 : with ogr.GetDriverByName(out_format).CreateDataSource(out_filename) as out_ds:
8119 : if src_lyr.GetLayerDefn().GetGeomFieldCount() > 1:
8120 : out_lyr = out_ds.CreateLayer(
8121 : src_lyr.GetName(), geom_type=ogr.wkbNone, options=lcos
8122 : )
8123 : for i in range(src_lyr.GetLayerDefn().GetGeomFieldCount()):
8124 : out_lyr.CreateGeomField(src_lyr.GetLayerDefn().GetGeomFieldDefn(i))
8125 : else:
8126 : out_lyr = out_ds.CreateLayer(
8127 : src_lyr.GetName(),
8128 : geom_type=src_lyr.GetGeomType(),
8129 : srs=src_lyr.GetSpatialRef(),
8130 : options=lcos,
8131 : )
8132 :
8133 : success, error_msg = out_lyr.IsArrowSchemaSupported(schema)
8134 : assert success, error_msg
8135 :
8136 : src_geom_field_names = [
8137 : src_lyr.GetLayerDefn().GetGeomFieldDefn(i).GetName()
8138 : for i in range(src_lyr.GetLayerDefn().GetGeomFieldCount())
8139 : ]
8140 : for i in range(schema.GetChildrenCount()):
8141 : # GetArrowStream() may return "OGC_FID" for a unnamed source FID
8142 : # column and "wkb_geometry" for a unnamed source geometry column.
8143 : # Also test GetFIDColumn() and src_geom_field_names if they are
8144 : # named.
8145 : if (
8146 : schema.GetChild(i).GetName()
8147 : not in ("OGC_FID", "wkb_geometry", src_lyr.GetFIDColumn())
8148 : and schema.GetChild(i).GetName() not in src_geom_field_names
8149 : ):
8150 : out_lyr.CreateFieldFromArrowSchema(schema.GetChild(i))
8151 :
8152 : write_options = []
8153 : if src_lyr.GetFIDColumn():
8154 : write_options.append("FID=" + src_lyr.GetFIDColumn())
8155 : if (
8156 : src_lyr.GetLayerDefn().GetGeomFieldCount() == 1
8157 : and src_lyr.GetGeometryColumn()
8158 : ):
8159 : write_options.append("GEOMETRY_NAME=" + src_lyr.GetGeometryColumn())
8160 :
8161 : while True:
8162 : array = stream.GetNextRecordBatch()
8163 : if array is None:
8164 : break
8165 : out_lyr.WriteArrowBatch(schema, array, write_options)
8166 : \endcode
8167 : *
8168 : * This method and CreateFeature() are mutually exclusive in the same session.
8169 : *
8170 : * This method is the same as the C function OGR_L_WriteArrowBatch().
8171 : *
8172 : * @param schema Schema of array
8173 : * @param array Array of type struct. It may be released (array->release==NULL)
8174 : * after calling this method.
8175 : * @param papszOptions Options. Null terminated list, or nullptr.
8176 : * @return true in case of success
8177 : * @since 3.8
8178 : */
8179 : // clang-format on
8180 :
8181 88 : bool OGRLayer::WriteArrowBatch(const struct ArrowSchema *schema,
8182 : struct ArrowArray *array,
8183 : CSLConstList papszOptions)
8184 : {
8185 88 : const char *format = schema->format;
8186 88 : if (!IsStructure(format))
8187 : {
8188 0 : CPLError(CE_Failure, CPLE_AppDefined,
8189 : "WriteArrowBatch() should be called on a schema that is a "
8190 : "struct of fields");
8191 0 : return false;
8192 : }
8193 :
8194 88 : if (schema->n_children != array->n_children)
8195 : {
8196 0 : CPLError(CE_Failure, CPLE_AppDefined,
8197 : "WriteArrowBatch(): schema->n_children (%d) != "
8198 : "array->n_children (%d)",
8199 0 : int(schema->n_children), int(array->n_children));
8200 0 : return false;
8201 : }
8202 :
8203 176 : CPLStringList aosNativeTypes;
8204 88 : auto poDS = const_cast<OGRLayer *>(this)->GetDataset();
8205 88 : if (poDS)
8206 : {
8207 88 : auto poDriver = poDS->GetDriver();
8208 88 : if (poDriver)
8209 : {
8210 : const char *pszMetadataItem =
8211 88 : poDriver->GetMetadataItem(GDAL_DMD_CREATIONFIELDDATATYPES);
8212 88 : if (pszMetadataItem)
8213 88 : aosNativeTypes = CSLTokenizeString2(pszMetadataItem, " ", 0);
8214 : }
8215 : }
8216 :
8217 176 : std::vector<FieldInfo> asFieldInfo;
8218 88 : auto poLayerDefn = GetLayerDefn();
8219 : const char *pszFIDName =
8220 88 : CSLFetchNameValueDef(papszOptions, "FID", GetFIDColumn());
8221 88 : if (!pszFIDName || pszFIDName[0] == 0)
8222 60 : pszFIDName = DEFAULT_ARROW_FID_NAME;
8223 : const bool bErrorIfFIDNotPreserved =
8224 88 : EQUAL(CSLFetchNameValueDef(papszOptions, "IF_FID_NOT_PRESERVED", ""),
8225 : "ERROR");
8226 : const bool bWarningIfFIDNotPreserved =
8227 88 : EQUAL(CSLFetchNameValueDef(papszOptions, "IF_FID_NOT_PRESERVED", ""),
8228 : "WARNING");
8229 : const bool bErrorIfFieldNotPreserved =
8230 88 : EQUAL(CSLFetchNameValueDef(papszOptions, "IF_FIELD_NOT_PRESERVED", ""),
8231 : "ERROR");
8232 88 : const char *pszGeomFieldName = CSLFetchNameValueDef(
8233 88 : papszOptions, "GEOMETRY_NAME", GetGeometryColumn());
8234 88 : if (!pszGeomFieldName || pszGeomFieldName[0] == 0)
8235 59 : pszGeomFieldName = DEFAULT_ARROW_GEOMETRY_NAME;
8236 88 : const struct ArrowSchema *schemaFIDColumn = nullptr;
8237 88 : struct ArrowArray *arrayFIDColumn = nullptr;
8238 88 : bool bFallbackTypesUsed = false;
8239 836 : for (int64_t i = 0; i < schema->n_children; ++i)
8240 : {
8241 749 : if (!BuildOGRFieldInfo(schema->children[i], array->children[i],
8242 749 : poLayerDefn, std::string(), aosNativeTypes,
8243 : bFallbackTypesUsed, asFieldInfo, pszFIDName,
8244 : pszGeomFieldName, this,
8245 749 : m_poPrivate->m_oMapArrowFieldNameToOGRFieldName,
8246 : schemaFIDColumn, arrayFIDColumn))
8247 : {
8248 1 : return false;
8249 : }
8250 : }
8251 :
8252 174 : std::map<int, int> oMapOGRFieldIndexToFieldInfoIndex;
8253 174 : std::vector<bool> abUseStringOptim(poLayerDefn->GetFieldCount(), false);
8254 855 : for (int i = 0; i < static_cast<int>(asFieldInfo.size()); ++i)
8255 : {
8256 768 : if (asFieldInfo[i].iOGRFieldIdx >= 0 && !asFieldInfo[i].bIsGeomCol)
8257 : {
8258 654 : CPLAssert(oMapOGRFieldIndexToFieldInfoIndex.find(
8259 : asFieldInfo[i].iOGRFieldIdx) ==
8260 : oMapOGRFieldIndexToFieldInfoIndex.end());
8261 654 : oMapOGRFieldIndexToFieldInfoIndex[asFieldInfo[i].iOGRFieldIdx] = i;
8262 1308 : abUseStringOptim[asFieldInfo[i].iOGRFieldIdx] =
8263 1308 : asFieldInfo[i].bUseStringOptim;
8264 : }
8265 : }
8266 :
8267 : struct FeatureDefnReleaser
8268 : {
8269 87 : void operator()(OGRFeatureDefn *poFDefn)
8270 : {
8271 87 : if (poFDefn)
8272 87 : poFDefn->Release();
8273 87 : }
8274 : };
8275 :
8276 : std::unique_ptr<OGRFeatureDefn, FeatureDefnReleaser> poLayerDefnTmp(
8277 174 : std::make_unique<OGRFeatureDefn>(poLayerDefn->GetName()).release());
8278 87 : poLayerDefnTmp->Reference();
8279 :
8280 174 : std::vector<int> anIdentityFieldMap;
8281 87 : if (bFallbackTypesUsed)
8282 : {
8283 30 : poLayerDefnTmp->SetGeomType(wkbNone);
8284 101 : for (int i = 0; i < poLayerDefn->GetFieldCount(); ++i)
8285 : {
8286 71 : anIdentityFieldMap.push_back(i);
8287 71 : const auto poSrcFieldDefn = poLayerDefn->GetFieldDefn(i);
8288 71 : const auto oIter = oMapOGRFieldIndexToFieldInfoIndex.find(i);
8289 : OGRFieldDefn oFieldDefn(
8290 : poSrcFieldDefn->GetNameRef(),
8291 71 : oIter == oMapOGRFieldIndexToFieldInfoIndex.end()
8292 1 : ? poSrcFieldDefn->GetType()
8293 143 : : asFieldInfo[oIter->second].eNominalFieldType);
8294 71 : if (oIter != oMapOGRFieldIndexToFieldInfoIndex.end())
8295 70 : asFieldInfo[oIter->second].eSetFeatureFieldType =
8296 70 : asFieldInfo[oIter->second].eNominalFieldType;
8297 71 : poLayerDefnTmp->AddFieldDefn(&oFieldDefn);
8298 : }
8299 59 : for (int i = 0; i < poLayerDefn->GetGeomFieldCount(); ++i)
8300 : {
8301 29 : poLayerDefnTmp->AddGeomFieldDefn(poLayerDefn->GetGeomFieldDefn(i));
8302 : }
8303 : }
8304 : else
8305 : {
8306 723 : for (auto &sFieldInfo : asFieldInfo)
8307 666 : sFieldInfo.eSetFeatureFieldType = sFieldInfo.eTargetFieldType;
8308 : }
8309 :
8310 : struct FeatureCleaner
8311 : {
8312 : OGRFeature &m_oFeature;
8313 : const std::vector<bool> &m_abUseStringOptim;
8314 :
8315 87 : explicit FeatureCleaner(OGRFeature &oFeature,
8316 : const std::vector<bool> &abUseStringOptim)
8317 87 : : m_oFeature(oFeature), m_abUseStringOptim(abUseStringOptim)
8318 : {
8319 87 : }
8320 :
8321 : // As we set a value that can't be CPLFree()'d in the .String member
8322 : // of string fields, we must take care of manually unsetting it before
8323 : // the destructor of OGRFeature gets called.
8324 87 : ~FeatureCleaner()
8325 87 : {
8326 87 : const auto poLayerDefn = m_oFeature.GetDefnRef();
8327 87 : const int nFieldCount = poLayerDefn->GetFieldCount();
8328 745 : for (int i = 0; i < nFieldCount; ++i)
8329 : {
8330 658 : if (m_abUseStringOptim[i])
8331 : {
8332 130 : if (m_oFeature.IsFieldSetAndNotNullUnsafe(i))
8333 102 : m_oFeature.SetFieldSameTypeUnsafe(
8334 : i, static_cast<char *>(nullptr));
8335 : }
8336 : }
8337 87 : }
8338 : };
8339 :
8340 30 : OGRFeature oFeature(bFallbackTypesUsed ? poLayerDefnTmp.get()
8341 204 : : poLayerDefn);
8342 174 : FeatureCleaner oCleaner(oFeature, abUseStringOptim);
8343 174 : OGRFeature oFeatureTarget(poLayerDefn);
8344 87 : OGRFeature *const poFeatureTarget =
8345 87 : bFallbackTypesUsed ? &oFeatureTarget : &oFeature;
8346 :
8347 : // We accumulate the content of all strings in osWorkingBuffer to avoid
8348 : // a few dynamic memory allocations
8349 174 : std::string osWorkingBuffer;
8350 :
8351 : bool bTransactionOK;
8352 : {
8353 87 : CPLErrorStateBackuper oBackuper(CPLQuietErrorHandler);
8354 87 : bTransactionOK = StartTransaction() == OGRERR_NONE;
8355 : }
8356 :
8357 174 : const std::string emptyString;
8358 87 : int64_t fidNullCount = 0;
8359 60484 : for (size_t iFeature = 0; iFeature < static_cast<size_t>(array->length);
8360 : ++iFeature)
8361 : {
8362 60407 : oFeature.SetFID(OGRNullFID);
8363 :
8364 60407 : int iArrowIdx = 0;
8365 60407 : const size_t nWorkingBufferSize = GetWorkingBufferSize(
8366 : schema, array, iFeature, iArrowIdx, asFieldInfo);
8367 60407 : osWorkingBuffer.clear();
8368 60407 : osWorkingBuffer.reserve(nWorkingBufferSize);
8369 : #ifdef DEBUG
8370 60407 : const char *pszWorkingBuffer = osWorkingBuffer.c_str();
8371 60407 : CPL_IGNORE_RET_VAL(pszWorkingBuffer);
8372 : #endif
8373 60407 : iArrowIdx = 0;
8374 1382770 : for (int64_t i = 0; i < schema->n_children; ++i)
8375 : {
8376 1322360 : if (!FillFeature(this, schema->children[i], array->children[i],
8377 : emptyString, iFeature, iArrowIdx, asFieldInfo,
8378 : oFeature, osWorkingBuffer))
8379 : {
8380 2 : if (bTransactionOK)
8381 2 : RollbackTransaction();
8382 10 : return false;
8383 : }
8384 : }
8385 : #ifdef DEBUG
8386 : // Check that the buffer didn't get reallocated
8387 60405 : CPLAssert(pszWorkingBuffer == osWorkingBuffer.c_str());
8388 60405 : CPLAssert(osWorkingBuffer.size() == nWorkingBufferSize);
8389 : #endif
8390 :
8391 60405 : if (bFallbackTypesUsed)
8392 : {
8393 48 : oFeatureTarget.SetFrom(&oFeature, anIdentityFieldMap.data(),
8394 : /*bForgiving=*/true,
8395 : /*bUseISO8601ForDateTimeAsString=*/true);
8396 48 : oFeatureTarget.SetFID(oFeature.GetFID());
8397 :
8398 48 : if (bErrorIfFieldNotPreserved)
8399 : {
8400 26 : for (int i = 0; i < poLayerDefn->GetFieldCount(); ++i)
8401 : {
8402 16 : if (!oFeature.IsFieldSetAndNotNullUnsafe(i))
8403 : {
8404 4 : continue;
8405 : }
8406 12 : bool bLossyConversion = false;
8407 : const auto eSrcType =
8408 12 : poLayerDefnTmp->GetFieldDefnUnsafe(i)->GetType();
8409 : const auto eDstType =
8410 12 : poLayerDefn->GetFieldDefnUnsafe(i)->GetType();
8411 :
8412 : const auto IsDoubleCastToInt64EqualTInt64 =
8413 2 : [](double dfVal, int64_t nOtherVal)
8414 : {
8415 : // Values in the range [INT64_MAX - 1023, INT64_MAX - 1]
8416 : // get converted to a double that once cast to int64_t
8417 : // is INT64_MAX + 1, hence the strict < comparison
8418 : return dfVal >=
8419 2 : static_cast<double>(
8420 2 : std::numeric_limits<int64_t>::min()) &&
8421 : dfVal <
8422 2 : static_cast<double>(
8423 4 : std::numeric_limits<int64_t>::max()) &&
8424 3 : static_cast<int64_t>(dfVal) == nOtherVal;
8425 : };
8426 :
8427 14 : if (eSrcType == OFTInteger64 && eDstType == OFTInteger &&
8428 2 : oFeatureTarget.GetFieldAsIntegerUnsafe(i) !=
8429 2 : oFeature.GetFieldAsInteger64Unsafe(i))
8430 : {
8431 1 : bLossyConversion = true;
8432 : }
8433 14 : else if (eSrcType == OFTReal && eDstType == OFTInteger &&
8434 3 : oFeatureTarget.GetFieldAsIntegerUnsafe(i) !=
8435 3 : oFeature.GetFieldAsDoubleUnsafe(i))
8436 : {
8437 2 : bLossyConversion = true;
8438 : }
8439 12 : else if (eSrcType == OFTReal && eDstType == OFTInteger64 &&
8440 3 : static_cast<double>(
8441 3 : oFeatureTarget.GetFieldAsInteger64Unsafe(i)) !=
8442 3 : oFeature.GetFieldAsDoubleUnsafe(i))
8443 : {
8444 2 : bLossyConversion = true;
8445 : }
8446 9 : else if (eSrcType == OFTInteger64 && eDstType == OFTReal &&
8447 2 : !IsDoubleCastToInt64EqualTInt64(
8448 : oFeatureTarget.GetFieldAsDoubleUnsafe(i),
8449 2 : oFeature.GetFieldAsInteger64Unsafe(i)))
8450 : {
8451 1 : bLossyConversion = true;
8452 : }
8453 12 : if (bLossyConversion)
8454 : {
8455 6 : CPLError(CE_Failure, CPLE_AppDefined,
8456 : "For feature " CPL_FRMT_GIB
8457 : ", value of field %s cannot not preserved",
8458 : oFeatureTarget.GetFID(),
8459 6 : poLayerDefnTmp->GetFieldDefn(i)->GetNameRef());
8460 6 : if (bTransactionOK)
8461 6 : RollbackTransaction();
8462 6 : return false;
8463 : }
8464 : }
8465 : }
8466 : }
8467 :
8468 60399 : const auto nInputFID = poFeatureTarget->GetFID();
8469 60399 : if (CreateFeature(poFeatureTarget) != OGRERR_NONE)
8470 : {
8471 1 : if (bTransactionOK)
8472 1 : RollbackTransaction();
8473 1 : return false;
8474 : }
8475 60398 : if (nInputFID != OGRNullFID)
8476 : {
8477 120587 : if (bWarningIfFIDNotPreserved &&
8478 : // cppcheck-suppress knownConditionTrueFalse
8479 60282 : poFeatureTarget->GetFID() != nInputFID)
8480 : {
8481 2 : CPLError(CE_Warning, CPLE_AppDefined,
8482 : "Feature id " CPL_FRMT_GIB " not preserved",
8483 : nInputFID);
8484 : }
8485 60304 : else if (bErrorIfFIDNotPreserved &&
8486 : // cppcheck-suppress knownConditionTrueFalse
8487 1 : poFeatureTarget->GetFID() != nInputFID)
8488 : {
8489 1 : CPLError(CE_Failure, CPLE_AppDefined,
8490 : "Feature id " CPL_FRMT_GIB " not preserved",
8491 : nInputFID);
8492 1 : if (bTransactionOK)
8493 1 : RollbackTransaction();
8494 1 : return false;
8495 : }
8496 : }
8497 :
8498 60397 : if (arrayFIDColumn)
8499 : {
8500 60309 : uint8_t *pabyValidity = static_cast<uint8_t *>(
8501 60309 : const_cast<void *>(arrayFIDColumn->buffers[0]));
8502 60309 : if (IsInt32(schemaFIDColumn->format))
8503 : {
8504 6 : auto *panValues = static_cast<int32_t *>(
8505 6 : const_cast<void *>(arrayFIDColumn->buffers[1]));
8506 6 : if (poFeatureTarget->GetFID() >
8507 6 : std::numeric_limits<int32_t>::max())
8508 : {
8509 0 : if (pabyValidity)
8510 : {
8511 0 : ++fidNullCount;
8512 0 : UnsetBit(pabyValidity,
8513 0 : static_cast<size_t>(iFeature +
8514 0 : arrayFIDColumn->offset));
8515 : }
8516 0 : CPLError(CE_Warning, CPLE_AppDefined,
8517 : "FID " CPL_FRMT_GIB
8518 : " cannot be stored in FID array of type int32",
8519 : poFeatureTarget->GetFID());
8520 : }
8521 : else
8522 : {
8523 6 : if (pabyValidity)
8524 : {
8525 5 : SetBit(pabyValidity,
8526 5 : static_cast<size_t>(iFeature +
8527 5 : arrayFIDColumn->offset));
8528 : }
8529 6 : panValues[iFeature + arrayFIDColumn->offset] =
8530 6 : static_cast<int32_t>(poFeatureTarget->GetFID());
8531 : }
8532 : }
8533 60303 : else if (IsInt64(schemaFIDColumn->format))
8534 : {
8535 60303 : if (pabyValidity)
8536 : {
8537 0 : SetBit(
8538 : pabyValidity,
8539 0 : static_cast<size_t>(iFeature + arrayFIDColumn->offset));
8540 : }
8541 60303 : auto *panValues = static_cast<int64_t *>(
8542 60303 : const_cast<void *>(arrayFIDColumn->buffers[1]));
8543 60303 : panValues[iFeature + arrayFIDColumn->offset] =
8544 60303 : poFeatureTarget->GetFID();
8545 : }
8546 : else
8547 : {
8548 0 : CPLAssert(false);
8549 : }
8550 : }
8551 : }
8552 77 : if (arrayFIDColumn && arrayFIDColumn->buffers[0])
8553 : {
8554 1 : arrayFIDColumn->null_count = fidNullCount;
8555 : }
8556 :
8557 77 : bool bRet = true;
8558 77 : if (bTransactionOK)
8559 66 : bRet = CommitTransaction() == OGRERR_NONE;
8560 :
8561 77 : return bRet;
8562 : }
8563 :
8564 : /************************************************************************/
8565 : /* OGR_L_WriteArrowBatch() */
8566 : /************************************************************************/
8567 :
8568 : // clang-format off
8569 : /** Writes a batch of rows from an ArrowArray.
8570 : *
8571 : * This is semantically close to calling CreateFeature() with multiple features
8572 : * at once.
8573 : *
8574 : * The ArrowArray must be of type struct (format=+s), and its children generally
8575 : * map to a OGR attribute or geometry field (unless they are struct themselves).
8576 : *
8577 : * Method IsArrowSchemaSupported() can be called to determine if the schema
8578 : * will be supported by WriteArrowBatch().
8579 : *
8580 : * OGR fields for the corresponding children arrays must exist and be of a
8581 : * compatible type. For attribute fields, they should generally be created with
8582 : * CreateFieldFromArrowSchema(). This is strictly required for output drivers
8583 : * Arrow or Parquet, and strongly recommended otherwise. For geometry fields,
8584 : * they should be created either implicitly at CreateLayer() type
8585 : * (if geom_type != wkbNone), or explicitly with CreateGeomField().
8586 : *
8587 : * Starting with GDAL 3.9, some tolerance has been introduced in the base
8588 : * implementation of WriteArrowBatch() for scenarios that involve appending to
8589 : * an already existing output layer when the input Arrow field type and the
8590 : * OGR layer field type are 32/64-bi integers or real number, but do not match
8591 : * exactly, which may cause lossy conversions. The IF_FIELD_NOT_PRESERVED option
8592 : * can be used to control the behavior in case of lossy conversion.
8593 : *
8594 : * Arrays for geometry columns should be of binary or large binary type and
8595 : * contain WKB geometry.
8596 : *
8597 : * Note that the passed array may be set to a released state
8598 : * (array->release==NULL) after this call (not by the base implementation,
8599 : * but in specialized ones such as Parquet or Arrow for example)
8600 : *
8601 : * Supported options of the base implementation are:
8602 : * <ul>
8603 : * <li>FID=name. Name of the FID column in the array. If not provided,
8604 : * GetFIDColumn() is used to determine it. The special name
8605 : * OGRLayer::DEFAULT_ARROW_FID_NAME is also recognized if neither FID nor
8606 : * GetFIDColumn() are set.
8607 : * The corresponding ArrowArray must be of type int32 (i) or int64 (l).
8608 : * On input, values of the FID column are used to create the feature.
8609 : * On output, the values of the FID column may be set with the FID of the
8610 : * created feature (if the array is not released).
8611 : * </li>
8612 : * <li>IF_FID_NOT_PRESERVED=NOTHING/ERROR/WARNING. Action to perform when the
8613 : * input FID is not preserved in the output layer. The default is NOTHING.
8614 : * Setting it to ERROR will cause the function to error out. Setting it
8615 : * to WARNING will cause the function to emit a warning but continue its
8616 : * processing.
8617 : * </li>
8618 : * <li>IF_FIELD_NOT_PRESERVED=ERROR/WARNING. (since GDAL 3.9)
8619 : * Action to perform when the input field value is not preserved in the
8620 : * output layer.
8621 : * The default is WARNING, which will cause the function to emit a warning
8622 : * but continue its processing.
8623 : * Setting it to ERROR will cause the function to error out if a lossy
8624 : * conversion is detected.
8625 : * </li>
8626 : * <li>GEOMETRY_NAME=name. Name of the geometry column. If not provided,
8627 : * GetGeometryColumn() is used. The special name
8628 : * OGRLayer::DEFAULT_ARROW_GEOMETRY_NAME is also recognized if neither
8629 : * GEOMETRY_NAME nor GetGeometryColumn() are set.
8630 : * Geometry columns are also identified if they have
8631 : * ARROW:extension:name=ogc.wkb as a field metadata.
8632 : * The corresponding ArrowArray must be of type binary (w) or large
8633 : * binary (W).
8634 : * </li>
8635 : * </ul>
8636 : *
8637 : * The following example demonstrates how to copy a layer from one format to
8638 : * another one (assuming it has at most a single geometry column):
8639 : \code{.py}
8640 : def copy_layer(src_lyr, out_filename, out_format, lcos = {}):
8641 : stream = src_lyr.GetArrowStream()
8642 : schema = stream.GetSchema()
8643 :
8644 : # If the source layer has a FID column and the output driver supports
8645 : # a FID layer creation option, set it to the source FID column name.
8646 : if src_lyr.GetFIDColumn():
8647 : creationOptions = gdal.GetDriverByName(out_format).GetMetadataItem(
8648 : "DS_LAYER_CREATIONOPTIONLIST"
8649 : )
8650 : if creationOptions and '"FID"' in creationOptions:
8651 : lcos["FID"] = src_lyr.GetFIDColumn()
8652 :
8653 : with ogr.GetDriverByName(out_format).CreateDataSource(out_filename) as out_ds:
8654 : if src_lyr.GetLayerDefn().GetGeomFieldCount() > 1:
8655 : out_lyr = out_ds.CreateLayer(
8656 : src_lyr.GetName(), geom_type=ogr.wkbNone, options=lcos
8657 : )
8658 : for i in range(src_lyr.GetLayerDefn().GetGeomFieldCount()):
8659 : out_lyr.CreateGeomField(src_lyr.GetLayerDefn().GetGeomFieldDefn(i))
8660 : else:
8661 : out_lyr = out_ds.CreateLayer(
8662 : src_lyr.GetName(),
8663 : geom_type=src_lyr.GetGeomType(),
8664 : srs=src_lyr.GetSpatialRef(),
8665 : options=lcos,
8666 : )
8667 :
8668 : success, error_msg = out_lyr.IsArrowSchemaSupported(schema)
8669 : assert success, error_msg
8670 :
8671 : src_geom_field_names = [
8672 : src_lyr.GetLayerDefn().GetGeomFieldDefn(i).GetName()
8673 : for i in range(src_lyr.GetLayerDefn().GetGeomFieldCount())
8674 : ]
8675 : for i in range(schema.GetChildrenCount()):
8676 : # GetArrowStream() may return "OGC_FID" for a unnamed source FID
8677 : # column and "wkb_geometry" for a unnamed source geometry column.
8678 : # Also test GetFIDColumn() and src_geom_field_names if they are
8679 : # named.
8680 : if (
8681 : schema.GetChild(i).GetName()
8682 : not in ("OGC_FID", "wkb_geometry", src_lyr.GetFIDColumn())
8683 : and schema.GetChild(i).GetName() not in src_geom_field_names
8684 : ):
8685 : out_lyr.CreateFieldFromArrowSchema(schema.GetChild(i))
8686 :
8687 : write_options = []
8688 : if src_lyr.GetFIDColumn():
8689 : write_options.append("FID=" + src_lyr.GetFIDColumn())
8690 : if (
8691 : src_lyr.GetLayerDefn().GetGeomFieldCount() == 1
8692 : and src_lyr.GetGeometryColumn()
8693 : ):
8694 : write_options.append("GEOMETRY_NAME=" + src_lyr.GetGeometryColumn())
8695 :
8696 : while True:
8697 : array = stream.GetNextRecordBatch()
8698 : if array is None:
8699 : break
8700 : out_lyr.WriteArrowBatch(schema, array, write_options)
8701 : \endcode
8702 : *
8703 : * This method and CreateFeature() are mutually exclusive in the same session.
8704 : *
8705 : * This method is the same as the C++ method OGRLayer::WriteArrowBatch().
8706 : *
8707 : * @param hLayer Layer.
8708 : * @param schema Schema of array.
8709 : * @param array Array of type struct. It may be released (array->release==NULL)
8710 : * after calling this method.
8711 : * @param papszOptions Options. Null terminated list, or nullptr.
8712 : * @return true in case of success
8713 : * @since 3.8
8714 : */
8715 : // clang-format on
8716 :
8717 59 : bool OGR_L_WriteArrowBatch(OGRLayerH hLayer, const struct ArrowSchema *schema,
8718 : struct ArrowArray *array, CSLConstList papszOptions)
8719 : {
8720 59 : VALIDATE_POINTER1(hLayer, __func__, false);
8721 59 : VALIDATE_POINTER1(schema, __func__, false);
8722 59 : VALIDATE_POINTER1(array, __func__, false);
8723 :
8724 118 : return OGRLayer::FromHandle(hLayer)->WriteArrowBatch(schema, array,
8725 59 : papszOptions);
8726 : }
|