Line data Source code
1 : /******************************************************************************
2 : *
3 : * Project: OpenGIS Simple Features Reference Implementation
4 : * Purpose: Parts of OGRLayer dealing with Arrow C interface
5 : * Author: Even Rouault, <even dot rouault at spatialys.com>
6 : *
7 : ******************************************************************************
8 : * Copyright (c) 2022-2023, Even Rouault <even dot rouault at spatialys.com>
9 : *
10 : * SPDX-License-Identifier: MIT
11 : ****************************************************************************/
12 :
13 : #include "ogrsf_frmts.h"
14 : #include "ogr_api.h"
15 : #include "ogr_recordbatch.h"
16 : #include "ograrrowarrayhelper.h"
17 : #include "ogrlayerarrow.h"
18 : #include "ogr_p.h"
19 : #include "ogr_swq.h"
20 : #include "ogr_wkb.h"
21 : #include "ogr_p.h"
22 : #include "ogrlayer_private.h"
23 :
24 : #include "cpl_float.h"
25 : #include "cpl_json.h"
26 : #include "cpl_time.h"
27 :
28 : #include <algorithm>
29 : #include <cassert>
30 : #include <cinttypes>
31 : #include <limits>
32 : #include <utility>
33 : #include <set>
34 :
35 : constexpr const char *MD_GDAL_OGR_TYPE = "GDAL:OGR:type";
36 : constexpr const char *MD_GDAL_OGR_ALTERNATIVE_NAME =
37 : "GDAL:OGR:alternative_name";
38 : constexpr const char *MD_GDAL_OGR_COMMENT = "GDAL:OGR:comment";
39 : constexpr const char *MD_GDAL_OGR_DEFAULT = "GDAL:OGR:default";
40 : constexpr const char *MD_GDAL_OGR_SUBTYPE = "GDAL:OGR:subtype";
41 : constexpr const char *MD_GDAL_OGR_WIDTH = "GDAL:OGR:width";
42 : constexpr const char *MD_GDAL_OGR_UNIQUE = "GDAL:OGR:unique";
43 : constexpr const char *MD_GDAL_OGR_DOMAIN_NAME = "GDAL:OGR:domain_name";
44 :
45 : constexpr char ARROW_LETTER_BOOLEAN = 'b';
46 : constexpr char ARROW_LETTER_INT8 = 'c';
47 : constexpr char ARROW_LETTER_UINT8 = 'C';
48 : constexpr char ARROW_LETTER_INT16 = 's';
49 : constexpr char ARROW_LETTER_UINT16 = 'S';
50 : constexpr char ARROW_LETTER_INT32 = 'i';
51 : constexpr char ARROW_LETTER_UINT32 = 'I';
52 : constexpr char ARROW_LETTER_INT64 = 'l';
53 : constexpr char ARROW_LETTER_UINT64 = 'L';
54 : constexpr char ARROW_LETTER_FLOAT16 = 'e';
55 : constexpr char ARROW_LETTER_FLOAT32 = 'f';
56 : constexpr char ARROW_LETTER_FLOAT64 = 'g';
57 : constexpr char ARROW_LETTER_STRING = 'u';
58 : constexpr char ARROW_LETTER_LARGE_STRING = 'U';
59 : constexpr char ARROW_LETTER_BINARY = 'z';
60 : constexpr char ARROW_LETTER_LARGE_BINARY = 'Z';
61 : constexpr char ARROW_LETTER_DECIMAL = 'd';
62 : constexpr char ARROW_2ND_LETTER_LIST = 'l';
63 : constexpr char ARROW_2ND_LETTER_LARGE_LIST = 'L';
64 :
65 2749850 : static inline bool IsStructure(const char *format)
66 : {
67 2749850 : return format[0] == '+' && format[1] == 's' && format[2] == 0;
68 : }
69 :
70 23106 : static inline bool IsMap(const char *format)
71 : {
72 23106 : return format[0] == '+' && format[1] == 'm' && format[2] == 0;
73 : }
74 :
75 3145 : static inline bool IsFixedWidthBinary(const char *format)
76 : {
77 3145 : return format[0] == 'w' && format[1] == ':';
78 : }
79 :
80 202 : static inline int GetFixedWithBinary(const char *format)
81 : {
82 202 : return atoi(format + strlen("w:"));
83 : }
84 :
85 30313 : static inline bool IsList(const char *format)
86 : {
87 36434 : return format[0] == '+' && format[1] == ARROW_2ND_LETTER_LIST &&
88 36434 : format[2] == 0;
89 : }
90 :
91 20226 : static inline bool IsLargeList(const char *format)
92 : {
93 20350 : return format[0] == '+' && format[1] == ARROW_2ND_LETTER_LARGE_LIST &&
94 20350 : format[2] == 0;
95 : }
96 :
97 33170 : static inline bool IsFixedSizeList(const char *format)
98 : {
99 33170 : return format[0] == '+' && format[1] == 'w' && format[2] == ':';
100 : }
101 :
102 2311 : static inline int GetFixedSizeList(const char *format)
103 : {
104 2311 : return atoi(format + strlen("+w:"));
105 : }
106 :
107 2774 : static inline bool IsDecimal(const char *format)
108 : {
109 2774 : return format[0] == ARROW_LETTER_DECIMAL && format[1] == ':';
110 : }
111 :
112 1341150 : static inline bool IsBoolean(const char *format)
113 : {
114 1341150 : return format[0] == ARROW_LETTER_BOOLEAN && format[1] == 0;
115 : }
116 :
117 1337900 : static inline bool IsInt8(const char *format)
118 : {
119 1337900 : return format[0] == ARROW_LETTER_INT8 && format[1] == 0;
120 : }
121 :
122 1338020 : static inline bool IsUInt8(const char *format)
123 : {
124 1338020 : return format[0] == ARROW_LETTER_UINT8 && format[1] == 0;
125 : }
126 :
127 1336600 : static inline bool IsInt16(const char *format)
128 : {
129 1336600 : return format[0] == ARROW_LETTER_INT16 && format[1] == 0;
130 : }
131 :
132 1336710 : static inline bool IsUInt16(const char *format)
133 : {
134 1336710 : return format[0] == ARROW_LETTER_UINT16 && format[1] == 0;
135 : }
136 :
137 1395720 : static inline bool IsInt32(const char *format)
138 : {
139 1395720 : return format[0] == ARROW_LETTER_INT32 && format[1] == 0;
140 : }
141 :
142 1335290 : static inline bool IsUInt32(const char *format)
143 : {
144 1335290 : return format[0] == ARROW_LETTER_UINT32 && format[1] == 0;
145 : }
146 :
147 1388640 : static inline bool IsInt64(const char *format)
148 : {
149 1388640 : return format[0] == ARROW_LETTER_INT64 && format[1] == 0;
150 : }
151 :
152 1268040 : static inline bool IsUInt64(const char *format)
153 : {
154 1268040 : return format[0] == ARROW_LETTER_UINT64 && format[1] == 0;
155 : }
156 :
157 14922 : static inline bool IsFloat16(const char *format)
158 : {
159 14922 : return format[0] == ARROW_LETTER_FLOAT16 && format[1] == 0;
160 : }
161 :
162 1274450 : static inline bool IsFloat32(const char *format)
163 : {
164 1274450 : return format[0] == ARROW_LETTER_FLOAT32 && format[1] == 0;
165 : }
166 :
167 1266110 : static inline bool IsFloat64(const char *format)
168 : {
169 1266110 : return format[0] == ARROW_LETTER_FLOAT64 && format[1] == 0;
170 : }
171 :
172 2484820 : static inline bool IsString(const char *format)
173 : {
174 2484820 : return format[0] == ARROW_LETTER_STRING && format[1] == 0;
175 : }
176 :
177 73719 : static inline bool IsLargeString(const char *format)
178 : {
179 73719 : return format[0] == ARROW_LETTER_LARGE_STRING && format[1] == 0;
180 : }
181 :
182 78865 : static inline bool IsBinary(const char *format)
183 : {
184 78865 : return format[0] == ARROW_LETTER_BINARY && format[1] == 0;
185 : }
186 :
187 12856 : static inline bool IsLargeBinary(const char *format)
188 : {
189 12856 : return format[0] == ARROW_LETTER_LARGE_BINARY && format[1] == 0;
190 : }
191 :
192 11296 : static inline bool IsTimestampInternal(const char *format, char chType)
193 : {
194 13013 : return format[0] == 't' && format[1] == 's' && format[2] == chType &&
195 13013 : format[3] == ':';
196 : }
197 :
198 3542 : static inline bool IsTimestampSeconds(const char *format)
199 : {
200 3542 : return IsTimestampInternal(format, 's');
201 : }
202 :
203 3532 : static inline bool IsTimestampMilliseconds(const char *format)
204 : {
205 3532 : return IsTimestampInternal(format, 'm');
206 : }
207 :
208 2387 : static inline bool IsTimestampMicroseconds(const char *format)
209 : {
210 2387 : return IsTimestampInternal(format, 'u');
211 : }
212 :
213 1835 : static inline bool IsTimestampNanoseconds(const char *format)
214 : {
215 1835 : return IsTimestampInternal(format, 'n');
216 : }
217 :
218 2784 : static inline bool IsTimestamp(const char *format)
219 : {
220 7260 : return IsTimestampSeconds(format) || IsTimestampMilliseconds(format) ||
221 7260 : IsTimestampMicroseconds(format) || IsTimestampNanoseconds(format);
222 : }
223 :
224 107 : static inline const char *GetTimestampTimezone(const char *format)
225 : {
226 107 : return IsTimestamp(format) ? format + strlen("tm?:") : "";
227 : }
228 :
229 : /************************************************************************/
230 : /* TestBit() */
231 : /************************************************************************/
232 :
233 12949 : inline bool TestBit(const uint8_t *pabyData, size_t nIdx)
234 : {
235 12949 : return (pabyData[nIdx / 8] & (1 << (nIdx % 8))) != 0;
236 : }
237 :
238 : /************************************************************************/
239 : /* SetBit() */
240 : /************************************************************************/
241 :
242 9596 : inline void SetBit(uint8_t *pabyData, size_t nIdx)
243 : {
244 9596 : pabyData[nIdx / 8] |= (1 << (nIdx % 8));
245 9596 : }
246 :
247 : /************************************************************************/
248 : /* UnsetBit() */
249 : /************************************************************************/
250 :
251 12274 : inline void UnsetBit(uint8_t *pabyData, size_t nIdx)
252 : {
253 12274 : pabyData[nIdx / 8] &= uint8_t(~(1 << (nIdx % 8)));
254 12274 : }
255 :
256 : /************************************************************************/
257 : /* DefaultReleaseSchema() */
258 : /************************************************************************/
259 :
260 25355 : static void OGRLayerReleaseSchema(struct ArrowSchema *schema,
261 : bool bFullFreeFormat)
262 : {
263 25355 : CPLAssert(schema->release != nullptr);
264 25355 : if (bFullFreeFormat || STARTS_WITH(schema->format, "w:") ||
265 25323 : STARTS_WITH(schema->format, "tsm:"))
266 : {
267 1033 : CPLFree(const_cast<char *>(schema->format));
268 : }
269 25355 : CPLFree(const_cast<char *>(schema->name));
270 25355 : CPLFree(const_cast<char *>(schema->metadata));
271 25355 : if (schema->children)
272 : {
273 26003 : for (int i = 0; i < static_cast<int>(schema->n_children); ++i)
274 : {
275 22795 : if (schema->children[i] && schema->children[i]->release)
276 : {
277 22795 : schema->children[i]->release(schema->children[i]);
278 22795 : CPLFree(schema->children[i]);
279 : }
280 : }
281 3208 : CPLFree(schema->children);
282 : }
283 25355 : if (schema->dictionary)
284 : {
285 32 : if (schema->dictionary->release)
286 : {
287 32 : schema->dictionary->release(schema->dictionary);
288 32 : CPLFree(schema->dictionary);
289 : }
290 : }
291 25355 : schema->release = nullptr;
292 25355 : }
293 :
294 25332 : static void OGRLayerPartialReleaseSchema(struct ArrowSchema *schema)
295 : {
296 25332 : OGRLayerReleaseSchema(schema, /* bFullFreeFormat = */ false);
297 25332 : }
298 :
299 23 : static void OGRLayerFullReleaseSchema(struct ArrowSchema *schema)
300 : {
301 23 : OGRLayerReleaseSchema(schema, /* bFullFreeFormat = */ true);
302 23 : }
303 :
304 : /** Release a ArrowSchema.
305 : *
306 : * To be used by driver implementations that have a custom GetArrowStream()
307 : * implementation.
308 : *
309 : * @param schema Schema to release.
310 : * @since GDAL 3.6
311 : */
312 :
313 25300 : void OGRLayer::ReleaseSchema(struct ArrowSchema *schema)
314 : {
315 25300 : OGRLayerPartialReleaseSchema(schema);
316 25300 : }
317 :
318 : /************************************************************************/
319 : /* AddDictToSchema() */
320 : /************************************************************************/
321 :
322 32 : static void AddDictToSchema(struct ArrowSchema *psChild,
323 : const OGRCodedFieldDomain *poCodedDomain)
324 : {
325 32 : const OGRCodedValue *psIter = poCodedDomain->GetEnumeration();
326 32 : int nLastCode = -1;
327 32 : int nCountNull = 0;
328 32 : uint32_t nCountChars = 0;
329 112 : for (; psIter->pszCode; ++psIter)
330 : {
331 80 : if (CPLGetValueType(psIter->pszCode) != CPL_VALUE_INTEGER)
332 : {
333 0 : return;
334 : }
335 80 : int nCode = atoi(psIter->pszCode);
336 80 : if (nCode <= nLastCode || nCode - nLastCode > 100)
337 : {
338 0 : return;
339 : }
340 106 : for (int i = nLastCode + 1; i < nCode; ++i)
341 : {
342 26 : nCountNull++;
343 : }
344 80 : if (psIter->pszValue != nullptr)
345 : {
346 54 : const size_t nLen = strlen(psIter->pszValue);
347 54 : if (nLen > std::numeric_limits<uint32_t>::max() - nCountChars)
348 0 : return;
349 54 : nCountChars += static_cast<uint32_t>(nLen);
350 : }
351 : else
352 26 : nCountNull++;
353 80 : nLastCode = nCode;
354 : }
355 :
356 : auto psChildDict = static_cast<struct ArrowSchema *>(
357 32 : CPLCalloc(1, sizeof(struct ArrowSchema)));
358 32 : psChild->dictionary = psChildDict;
359 32 : psChildDict->release = OGRLayerPartialReleaseSchema;
360 32 : psChildDict->name = CPLStrdup(poCodedDomain->GetName().c_str());
361 32 : psChildDict->format = "u";
362 32 : if (nCountNull)
363 26 : psChildDict->flags = ARROW_FLAG_NULLABLE;
364 : }
365 :
366 : /************************************************************************/
367 : /* DefaultGetArrowSchema() */
368 : /************************************************************************/
369 :
370 : /** Default implementation of the ArrowArrayStream::get_schema() callback.
371 : *
372 : * To be used by driver implementations that have a custom GetArrowStream()
373 : * implementation.
374 : *
375 : * @since GDAL 3.6
376 : */
377 2194 : int OGRLayer::GetArrowSchema(struct ArrowArrayStream *,
378 : struct ArrowSchema *out_schema)
379 : {
380 2194 : const bool bIncludeFID = CPLTestBool(
381 : m_aosArrowArrayStreamOptions.FetchNameValueDef("INCLUDE_FID", "YES"));
382 2194 : const bool bDateTimeAsString = m_aosArrowArrayStreamOptions.FetchBool(
383 : GAS_OPT_DATETIME_AS_STRING, false);
384 2194 : memset(out_schema, 0, sizeof(*out_schema));
385 2194 : out_schema->format = "+s";
386 2194 : out_schema->name = CPLStrdup("");
387 2194 : out_schema->metadata = nullptr;
388 2194 : auto poLayerDefn = GetLayerDefn();
389 2194 : const int nFieldCount = poLayerDefn->GetFieldCount();
390 2194 : const int nGeomFieldCount = poLayerDefn->GetGeomFieldCount();
391 2194 : const int nChildren = 1 + nFieldCount + nGeomFieldCount;
392 :
393 2194 : out_schema->children = static_cast<struct ArrowSchema **>(
394 2194 : CPLCalloc(nChildren, sizeof(struct ArrowSchema *)));
395 2194 : int iSchemaChild = 0;
396 2194 : if (bIncludeFID)
397 : {
398 3884 : out_schema->children[iSchemaChild] = static_cast<struct ArrowSchema *>(
399 1942 : CPLCalloc(1, sizeof(struct ArrowSchema)));
400 1942 : auto psChild = out_schema->children[iSchemaChild];
401 1942 : ++iSchemaChild;
402 1942 : psChild->release = OGRLayer::ReleaseSchema;
403 1942 : const char *pszFIDName = GetFIDColumn();
404 1942 : psChild->name =
405 1942 : CPLStrdup((pszFIDName && pszFIDName[0]) ? pszFIDName
406 : : DEFAULT_ARROW_FID_NAME);
407 1942 : psChild->format = "l";
408 : }
409 20168 : for (int i = 0; i < nFieldCount; ++i)
410 : {
411 17974 : const auto poFieldDefn = poLayerDefn->GetFieldDefn(i);
412 17974 : if (poFieldDefn->IsIgnored())
413 : {
414 40 : continue;
415 : }
416 :
417 35868 : out_schema->children[iSchemaChild] = static_cast<struct ArrowSchema *>(
418 17934 : CPLCalloc(1, sizeof(struct ArrowSchema)));
419 17934 : auto psChild = out_schema->children[iSchemaChild];
420 17934 : ++iSchemaChild;
421 17934 : psChild->release = OGRLayer::ReleaseSchema;
422 17934 : psChild->name = CPLStrdup(poFieldDefn->GetNameRef());
423 17934 : if (poFieldDefn->IsNullable())
424 17148 : psChild->flags = ARROW_FLAG_NULLABLE;
425 17934 : const auto eType = poFieldDefn->GetType();
426 17934 : const auto eSubType = poFieldDefn->GetSubType();
427 17934 : const char *item_format = nullptr;
428 17934 : switch (eType)
429 : {
430 5867 : case OFTInteger:
431 : {
432 5867 : if (eSubType == OFSTBoolean)
433 286 : psChild->format = "b";
434 5581 : else if (eSubType == OFSTInt16)
435 673 : psChild->format = "s";
436 : else
437 4908 : psChild->format = "i";
438 :
439 5867 : const auto &osDomainName = poFieldDefn->GetDomainName();
440 5867 : if (!osDomainName.empty())
441 : {
442 32 : auto poDS = GetDataset();
443 32 : if (poDS)
444 : {
445 : const auto poFieldDomain =
446 32 : poDS->GetFieldDomain(osDomainName);
447 64 : if (poFieldDomain &&
448 32 : poFieldDomain->GetDomainType() == OFDT_CODED)
449 : {
450 32 : const OGRCodedFieldDomain *poCodedDomain =
451 : static_cast<const OGRCodedFieldDomain *>(
452 : poFieldDomain);
453 32 : AddDictToSchema(psChild, poCodedDomain);
454 : }
455 : }
456 : }
457 :
458 5867 : break;
459 : }
460 :
461 519 : case OFTInteger64:
462 519 : psChild->format = "l";
463 519 : break;
464 :
465 2842 : case OFTReal:
466 : {
467 2842 : if (eSubType == OFSTFloat32)
468 676 : psChild->format = "f";
469 : else
470 2166 : psChild->format = "g";
471 2842 : break;
472 : }
473 :
474 5142 : case OFTString:
475 : case OFTWideString:
476 5142 : psChild->format = "u";
477 5142 : break;
478 :
479 1209 : case OFTBinary:
480 : {
481 1209 : if (poFieldDefn->GetWidth() > 0)
482 9 : psChild->format =
483 9 : CPLStrdup(CPLSPrintf("w:%d", poFieldDefn->GetWidth()));
484 : else
485 1200 : psChild->format = "z";
486 1209 : break;
487 : }
488 :
489 383 : case OFTIntegerList:
490 : {
491 383 : if (eSubType == OFSTBoolean)
492 92 : item_format = "b";
493 291 : else if (eSubType == OFSTInt16)
494 67 : item_format = "s";
495 : else
496 224 : item_format = "i";
497 383 : break;
498 : }
499 :
500 97 : case OFTInteger64List:
501 97 : item_format = "l";
502 97 : break;
503 :
504 256 : case OFTRealList:
505 : {
506 256 : if (eSubType == OFSTFloat32)
507 84 : item_format = "f";
508 : else
509 172 : item_format = "g";
510 256 : break;
511 : }
512 :
513 273 : case OFTStringList:
514 : case OFTWideStringList:
515 273 : item_format = "u";
516 273 : break;
517 :
518 210 : case OFTDate:
519 210 : psChild->format = "tdD";
520 210 : break;
521 :
522 118 : case OFTTime:
523 118 : psChild->format = "ttm";
524 118 : break;
525 :
526 1018 : case OFTDateTime:
527 : {
528 1018 : const char *pszPrefix = "tsm:";
529 : const char *pszTZOverride =
530 1018 : m_aosArrowArrayStreamOptions.FetchNameValue("TIMEZONE");
531 1018 : if (bDateTimeAsString)
532 : {
533 17 : psChild->format = "u";
534 : }
535 1001 : else if (pszTZOverride && EQUAL(pszTZOverride, "unknown"))
536 : {
537 2 : psChild->format = CPLStrdup(pszPrefix);
538 : }
539 999 : else if (pszTZOverride)
540 : {
541 40 : psChild->format = CPLStrdup(
542 80 : (std::string(pszPrefix) + pszTZOverride).c_str());
543 : }
544 : else
545 : {
546 959 : const int nTZFlag = poFieldDefn->GetTZFlag();
547 959 : if (nTZFlag == OGR_TZFLAG_MIXED_TZ ||
548 : nTZFlag == OGR_TZFLAG_UTC)
549 : {
550 7 : psChild->format =
551 7 : CPLStrdup(CPLSPrintf("%sUTC", pszPrefix));
552 : }
553 952 : else if (nTZFlag == OGR_TZFLAG_UNKNOWN ||
554 : nTZFlag == OGR_TZFLAG_LOCALTIME)
555 : {
556 936 : psChild->format = CPLStrdup(pszPrefix);
557 : }
558 : else
559 : {
560 16 : psChild->format = CPLStrdup(
561 32 : (pszPrefix + OGRTZFlagToTimezone(nTZFlag, "UTC"))
562 : .c_str());
563 : }
564 : }
565 1018 : break;
566 : }
567 : }
568 :
569 17934 : if (item_format)
570 : {
571 1009 : psChild->format = "+l";
572 1009 : psChild->n_children = 1;
573 1009 : psChild->children = static_cast<struct ArrowSchema **>(
574 1009 : CPLCalloc(1, sizeof(struct ArrowSchema *)));
575 2018 : psChild->children[0] = static_cast<struct ArrowSchema *>(
576 1009 : CPLCalloc(1, sizeof(struct ArrowSchema)));
577 1009 : psChild->children[0]->release = OGRLayer::ReleaseSchema;
578 1009 : psChild->children[0]->name = CPLStrdup("item");
579 1009 : psChild->children[0]->format = item_format;
580 : }
581 :
582 35868 : std::vector<std::pair<std::string, std::string>> oMetadata;
583 :
584 17934 : if (eType == OFTDateTime && bDateTimeAsString)
585 : {
586 : oMetadata.emplace_back(
587 17 : std::pair(MD_GDAL_OGR_TYPE, OGR_GetFieldTypeName(eType)));
588 : }
589 :
590 17934 : const char *pszAlternativeName = poFieldDefn->GetAlternativeNameRef();
591 17934 : if (pszAlternativeName && pszAlternativeName[0])
592 : oMetadata.emplace_back(
593 262 : std::pair(MD_GDAL_OGR_ALTERNATIVE_NAME, pszAlternativeName));
594 :
595 17934 : const char *pszDefault = poFieldDefn->GetDefault();
596 17934 : if (pszDefault && pszDefault[0])
597 42 : oMetadata.emplace_back(std::pair(MD_GDAL_OGR_DEFAULT, pszDefault));
598 :
599 17934 : const std::string &osComment = poFieldDefn->GetComment();
600 17934 : if (!osComment.empty())
601 10 : oMetadata.emplace_back(std::pair(MD_GDAL_OGR_COMMENT, osComment));
602 :
603 17934 : if (eType == OFTString && eSubType == OFSTJSON)
604 : {
605 130 : oMetadata.emplace_back(
606 130 : std::pair(ARROW_EXTENSION_NAME_KEY, EXTENSION_NAME_ARROW_JSON));
607 : }
608 17804 : else if (eSubType != OFSTNone && eSubType != OFSTBoolean &&
609 : eSubType != OFSTFloat32)
610 : {
611 0 : oMetadata.emplace_back(std::pair(
612 741 : MD_GDAL_OGR_SUBTYPE, OGR_GetFieldSubTypeName(eSubType)));
613 : }
614 17934 : if (eType == OFTString && poFieldDefn->GetWidth() > 0)
615 : {
616 0 : oMetadata.emplace_back(std::pair(
617 661 : MD_GDAL_OGR_WIDTH, CPLSPrintf("%d", poFieldDefn->GetWidth())));
618 : }
619 17934 : if (poFieldDefn->IsUnique())
620 : {
621 10 : oMetadata.emplace_back(std::pair(MD_GDAL_OGR_UNIQUE, "true"));
622 : }
623 17934 : if (!poFieldDefn->GetDomainName().empty())
624 : {
625 64 : oMetadata.emplace_back(std::pair(MD_GDAL_OGR_DOMAIN_NAME,
626 64 : poFieldDefn->GetDomainName()));
627 : }
628 :
629 17934 : if (!oMetadata.empty())
630 : {
631 1875 : uint64_t nLen64 = sizeof(int32_t);
632 3780 : for (const auto &oPair : oMetadata)
633 : {
634 1905 : nLen64 += sizeof(int32_t);
635 1905 : nLen64 += oPair.first.size();
636 1905 : nLen64 += sizeof(int32_t);
637 1905 : nLen64 += oPair.second.size();
638 : }
639 1875 : if (nLen64 <
640 1875 : static_cast<uint64_t>(std::numeric_limits<int32_t>::max()))
641 : {
642 1875 : const size_t nLen = static_cast<size_t>(nLen64);
643 1875 : char *pszMetadata = static_cast<char *>(CPLMalloc(nLen));
644 1875 : psChild->metadata = pszMetadata;
645 1875 : size_t offsetMD = 0;
646 1875 : int32_t nSize = static_cast<int>(oMetadata.size());
647 1875 : memcpy(pszMetadata + offsetMD, &nSize, sizeof(nSize));
648 1875 : offsetMD += sizeof(int32_t);
649 3780 : for (const auto &oPair : oMetadata)
650 : {
651 1905 : nSize = static_cast<int32_t>(oPair.first.size());
652 1905 : memcpy(pszMetadata + offsetMD, &nSize, sizeof(nSize));
653 1905 : offsetMD += sizeof(int32_t);
654 1905 : memcpy(pszMetadata + offsetMD, oPair.first.data(),
655 : oPair.first.size());
656 1905 : offsetMD += oPair.first.size();
657 :
658 1905 : nSize = static_cast<int32_t>(oPair.second.size());
659 1905 : memcpy(pszMetadata + offsetMD, &nSize, sizeof(nSize));
660 1905 : offsetMD += sizeof(int32_t);
661 1905 : memcpy(pszMetadata + offsetMD, oPair.second.data(),
662 : oPair.second.size());
663 1905 : offsetMD += oPair.second.size();
664 : }
665 :
666 1875 : CPLAssert(offsetMD == nLen);
667 1875 : CPL_IGNORE_RET_VAL(offsetMD);
668 : }
669 : else
670 : {
671 : // Extremely unlikely !
672 0 : CPLError(CE_Warning, CPLE_AppDefined,
673 : "Cannot write ArrowSchema::metadata due to "
674 : "too large content");
675 : }
676 : }
677 : }
678 :
679 : const char *const pszGeometryMetadataEncoding =
680 2194 : m_aosArrowArrayStreamOptions.FetchNameValue(
681 : "GEOMETRY_METADATA_ENCODING");
682 2194 : const char *pszExtensionName = EXTENSION_NAME_OGC_WKB;
683 2194 : if (pszGeometryMetadataEncoding)
684 : {
685 4 : if (EQUAL(pszGeometryMetadataEncoding, "OGC"))
686 0 : pszExtensionName = EXTENSION_NAME_OGC_WKB;
687 4 : else if (EQUAL(pszGeometryMetadataEncoding, "GEOARROW"))
688 4 : pszExtensionName = EXTENSION_NAME_GEOARROW_WKB;
689 : else
690 0 : CPLError(CE_Warning, CPLE_NotSupported,
691 : "Unsupported GEOMETRY_METADATA_ENCODING value: %s",
692 : pszGeometryMetadataEncoding);
693 : }
694 4101 : for (int i = 0; i < nGeomFieldCount; ++i)
695 : {
696 1907 : const auto poFieldDefn = poLayerDefn->GetGeomFieldDefn(i);
697 1907 : if (poFieldDefn->IsIgnored())
698 : {
699 15 : continue;
700 : }
701 :
702 1892 : out_schema->children[iSchemaChild] = CreateSchemaForWKBGeometryColumn(
703 : poFieldDefn, "z", pszExtensionName);
704 :
705 1892 : ++iSchemaChild;
706 : }
707 :
708 2194 : out_schema->n_children = iSchemaChild;
709 2194 : out_schema->release = OGRLayer::ReleaseSchema;
710 2194 : return 0;
711 : }
712 :
713 : /************************************************************************/
714 : /* CreateSchemaForWKBGeometryColumn() */
715 : /************************************************************************/
716 :
717 : /** Return a ArrowSchema* corresponding to the WKB encoding of a geometry
718 : * column.
719 : */
720 :
721 : /* static */
722 : struct ArrowSchema *
723 2221 : OGRLayer::CreateSchemaForWKBGeometryColumn(const OGRGeomFieldDefn *poFieldDefn,
724 : const char *pszArrowFormat,
725 : const char *pszExtensionName)
726 : {
727 2221 : CPLAssert(strcmp(pszArrowFormat, "z") == 0 ||
728 : strcmp(pszArrowFormat, "Z") == 0);
729 2221 : if (!EQUAL(pszExtensionName, EXTENSION_NAME_OGC_WKB) &&
730 4 : !EQUAL(pszExtensionName, EXTENSION_NAME_GEOARROW_WKB))
731 : {
732 0 : CPLError(CE_Failure, CPLE_NotSupported,
733 : "Unsupported extension name '%s'. Defaulting to '%s'",
734 : pszExtensionName, EXTENSION_NAME_OGC_WKB);
735 0 : pszExtensionName = EXTENSION_NAME_OGC_WKB;
736 : }
737 : auto psSchema = static_cast<struct ArrowSchema *>(
738 2221 : CPLCalloc(1, sizeof(struct ArrowSchema)));
739 2221 : psSchema->release = OGRLayer::ReleaseSchema;
740 2221 : const char *pszGeomFieldName = poFieldDefn->GetNameRef();
741 2221 : if (pszGeomFieldName[0] == '\0')
742 765 : pszGeomFieldName = DEFAULT_ARROW_GEOMETRY_NAME;
743 2221 : psSchema->name = CPLStrdup(pszGeomFieldName);
744 2221 : if (poFieldDefn->IsNullable())
745 2192 : psSchema->flags = ARROW_FLAG_NULLABLE;
746 2221 : psSchema->format = strcmp(pszArrowFormat, "z") == 0 ? "z" : "Z";
747 2221 : std::string osExtensionMetadata;
748 2221 : if (EQUAL(pszExtensionName, EXTENSION_NAME_GEOARROW_WKB))
749 : {
750 4 : const auto poSRS = poFieldDefn->GetSpatialRef();
751 4 : if (poSRS)
752 : {
753 2 : char *pszPROJJSON = nullptr;
754 2 : poSRS->exportToPROJJSON(&pszPROJJSON, nullptr);
755 2 : if (pszPROJJSON)
756 : {
757 2 : osExtensionMetadata = "{\"crs\":";
758 2 : osExtensionMetadata += pszPROJJSON;
759 2 : osExtensionMetadata += '}';
760 2 : CPLFree(pszPROJJSON);
761 : }
762 : else
763 : {
764 0 : CPLError(CE_Warning, CPLE_AppDefined,
765 : "Cannot export CRS of geometry field %s to PROJJSON",
766 : poFieldDefn->GetNameRef());
767 : }
768 : }
769 : }
770 2221 : size_t nLen = sizeof(int32_t) + sizeof(int32_t) +
771 : strlen(ARROW_EXTENSION_NAME_KEY) + sizeof(int32_t) +
772 2221 : strlen(pszExtensionName);
773 2221 : if (!osExtensionMetadata.empty())
774 : {
775 2 : nLen += sizeof(int32_t) + strlen(ARROW_EXTENSION_METADATA_KEY) +
776 2 : sizeof(int32_t) + osExtensionMetadata.size();
777 : }
778 2221 : char *pszMetadata = static_cast<char *>(CPLMalloc(nLen));
779 2221 : psSchema->metadata = pszMetadata;
780 2221 : size_t offsetMD = 0;
781 2221 : *reinterpret_cast<int32_t *>(pszMetadata + offsetMD) =
782 2221 : osExtensionMetadata.empty() ? 1 : 2;
783 2221 : offsetMD += sizeof(int32_t);
784 2221 : *reinterpret_cast<int32_t *>(pszMetadata + offsetMD) =
785 : static_cast<int32_t>(strlen(ARROW_EXTENSION_NAME_KEY));
786 2221 : offsetMD += sizeof(int32_t);
787 2221 : memcpy(pszMetadata + offsetMD, ARROW_EXTENSION_NAME_KEY,
788 : strlen(ARROW_EXTENSION_NAME_KEY));
789 2221 : offsetMD += static_cast<int>(strlen(ARROW_EXTENSION_NAME_KEY));
790 2221 : *reinterpret_cast<int32_t *>(pszMetadata + offsetMD) =
791 2221 : static_cast<int32_t>(strlen(pszExtensionName));
792 2221 : offsetMD += sizeof(int32_t);
793 2221 : memcpy(pszMetadata + offsetMD, pszExtensionName, strlen(pszExtensionName));
794 2221 : offsetMD += strlen(pszExtensionName);
795 2221 : if (!osExtensionMetadata.empty())
796 : {
797 2 : *reinterpret_cast<int32_t *>(pszMetadata + offsetMD) =
798 : static_cast<int32_t>(strlen(ARROW_EXTENSION_METADATA_KEY));
799 2 : offsetMD += sizeof(int32_t);
800 2 : memcpy(pszMetadata + offsetMD, ARROW_EXTENSION_METADATA_KEY,
801 : strlen(ARROW_EXTENSION_METADATA_KEY));
802 2 : offsetMD += static_cast<int>(strlen(ARROW_EXTENSION_METADATA_KEY));
803 2 : *reinterpret_cast<int32_t *>(pszMetadata + offsetMD) =
804 2 : static_cast<int32_t>(osExtensionMetadata.size());
805 2 : offsetMD += sizeof(int32_t);
806 2 : memcpy(pszMetadata + offsetMD, osExtensionMetadata.c_str(),
807 : osExtensionMetadata.size());
808 2 : offsetMD += osExtensionMetadata.size();
809 : }
810 2221 : CPLAssert(offsetMD == nLen);
811 2221 : CPL_IGNORE_RET_VAL(offsetMD);
812 4442 : return psSchema;
813 : }
814 :
815 : /************************************************************************/
816 : /* StaticGetArrowSchema() */
817 : /************************************************************************/
818 :
819 : /** Default implementation of the ArrowArrayStream::get_schema() callback.
820 : *
821 : * To be used by driver implementations that have a custom GetArrowStream()
822 : * implementation.
823 : *
824 : * @since GDAL 3.6
825 : */
826 2414 : int OGRLayer::StaticGetArrowSchema(struct ArrowArrayStream *stream,
827 : struct ArrowSchema *out_schema)
828 : {
829 : auto poLayer = static_cast<ArrowArrayStreamPrivateDataSharedDataWrapper *>(
830 2414 : stream->private_data)
831 2414 : ->poShared->m_poLayer;
832 2414 : if (poLayer == nullptr)
833 : {
834 1 : CPLError(CE_Failure, CPLE_NotSupported,
835 : "Calling get_schema() on a freed OGRLayer is not supported");
836 1 : return EINVAL;
837 : }
838 2413 : return poLayer->GetArrowSchema(stream, out_schema);
839 : }
840 :
841 : /************************************************************************/
842 : /* DefaultReleaseArray() */
843 : /************************************************************************/
844 :
845 34789 : static void OGRLayerDefaultReleaseArray(struct ArrowArray *array)
846 : {
847 34789 : if (array->buffers)
848 : {
849 109356 : for (int i = 0; i < static_cast<int>(array->n_buffers); ++i)
850 74567 : VSIFreeAligned(const_cast<void *>(array->buffers[i]));
851 34789 : CPLFree(array->buffers);
852 : }
853 34789 : if (array->children)
854 : {
855 40730 : for (int i = 0; i < static_cast<int>(array->n_children); ++i)
856 : {
857 33012 : if (array->children[i] && array->children[i]->release)
858 : {
859 32639 : array->children[i]->release(array->children[i]);
860 32639 : CPLFree(array->children[i]);
861 : }
862 : }
863 7718 : CPLFree(array->children);
864 : }
865 34789 : if (array->dictionary)
866 : {
867 148 : if (array->dictionary->release)
868 : {
869 148 : array->dictionary->release(array->dictionary);
870 148 : CPLFree(array->dictionary);
871 : }
872 : }
873 34789 : array->release = nullptr;
874 34789 : }
875 :
876 : /** Release a ArrowArray.
877 : *
878 : * To be used by driver implementations that have a custom GetArrowStream()
879 : * implementation.
880 : *
881 : * @param array Arrow array to release.
882 : * @since GDAL 3.6
883 : */
884 3913 : void OGRLayer::ReleaseArray(struct ArrowArray *array)
885 : {
886 3913 : OGRLayerDefaultReleaseArray(array);
887 3913 : }
888 :
889 : /************************************************************************/
890 : /* IsValidField() */
891 : /************************************************************************/
892 :
893 89170 : static inline bool IsValidField(const OGRField *psRawField)
894 : {
895 104302 : return (!(psRawField->Set.nMarker1 == OGRUnsetMarker &&
896 7566 : psRawField->Set.nMarker2 == OGRUnsetMarker &&
897 178340 : psRawField->Set.nMarker3 == OGRUnsetMarker) &&
898 81604 : !(psRawField->Set.nMarker1 == OGRNullMarker &&
899 3210 : psRawField->Set.nMarker2 == OGRNullMarker &&
900 92380 : psRawField->Set.nMarker3 == OGRNullMarker));
901 : }
902 :
903 : /************************************************************************/
904 : /* AllocValidityBitmap() */
905 : /************************************************************************/
906 :
907 3503 : static uint8_t *AllocValidityBitmap(size_t nSize)
908 : {
909 : auto pabyValidity = static_cast<uint8_t *>(
910 3503 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE((1 + nSize + 7) / 8));
911 3503 : if (pabyValidity)
912 : {
913 : // All valid initially
914 3503 : memset(pabyValidity, 0xFF, (nSize + 7) / 8);
915 : }
916 3503 : return pabyValidity;
917 : }
918 :
919 : /************************************************************************/
920 : /* FillArray() */
921 : /************************************************************************/
922 :
923 : template <class T, typename TMember>
924 5853 : static bool FillArray(struct ArrowArray *psChild,
925 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
926 : const size_t nFeatureCountLimit, const bool bIsNullable,
927 : TMember member, const int i)
928 : {
929 5853 : psChild->n_buffers = 2;
930 5853 : psChild->buffers = static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
931 5853 : uint8_t *pabyValidity = nullptr;
932 : T *panValues = static_cast<T *>(
933 5853 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(T) * (1 + nFeatureCountLimit)));
934 5853 : if (panValues == nullptr)
935 0 : return false;
936 5853 : psChild->buffers[1] = panValues;
937 54299 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat)
938 : {
939 48446 : auto &poFeature = apoFeatures[iFeat];
940 48446 : const auto psRawField = poFeature->GetRawFieldRef(i);
941 48446 : if (IsValidField(psRawField))
942 : {
943 43531 : panValues[iFeat] = static_cast<T>((*psRawField).*member);
944 : }
945 4915 : else if (bIsNullable)
946 : {
947 4915 : panValues[iFeat] = 0;
948 4915 : ++psChild->null_count;
949 4915 : if (pabyValidity == nullptr)
950 : {
951 1232 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
952 1232 : psChild->buffers[0] = pabyValidity;
953 1232 : if (pabyValidity == nullptr)
954 0 : return false;
955 : }
956 4915 : UnsetBit(pabyValidity, iFeat);
957 : }
958 : else
959 : {
960 0 : panValues[iFeat] = 0;
961 : }
962 : }
963 5853 : return true;
964 : }
965 :
966 : /************************************************************************/
967 : /* FillBoolArray() */
968 : /************************************************************************/
969 :
970 : template <typename TMember>
971 138 : static bool FillBoolArray(struct ArrowArray *psChild,
972 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
973 : const size_t nFeatureCountLimit,
974 : const bool bIsNullable, TMember member, const int i)
975 : {
976 138 : psChild->n_buffers = 2;
977 138 : psChild->buffers = static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
978 138 : uint8_t *pabyValidity = nullptr;
979 : uint8_t *panValues = static_cast<uint8_t *>(
980 138 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE((nFeatureCountLimit + 7 + 1) / 8));
981 138 : if (panValues == nullptr)
982 0 : return false;
983 138 : memset(panValues, 0, (nFeatureCountLimit + 7) / 8);
984 138 : psChild->buffers[1] = panValues;
985 601 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat)
986 : {
987 463 : auto &poFeature = apoFeatures[iFeat];
988 463 : const auto psRawField = poFeature->GetRawFieldRef(i);
989 463 : if (IsValidField(psRawField))
990 : {
991 405 : if ((*psRawField).*member)
992 81 : SetBit(panValues, iFeat);
993 : }
994 58 : else if (bIsNullable)
995 : {
996 58 : ++psChild->null_count;
997 58 : if (pabyValidity == nullptr)
998 : {
999 46 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1000 46 : psChild->buffers[0] = pabyValidity;
1001 46 : if (pabyValidity == nullptr)
1002 0 : return false;
1003 : }
1004 58 : UnsetBit(pabyValidity, iFeat);
1005 : }
1006 : }
1007 138 : return true;
1008 : }
1009 :
1010 : /************************************************************************/
1011 : /* FillListArray() */
1012 : /************************************************************************/
1013 :
1014 : struct GetFromIntegerList
1015 : {
1016 555 : static inline int getCount(const OGRField *psRawField)
1017 : {
1018 555 : return psRawField->IntegerList.nCount;
1019 : }
1020 :
1021 276 : static inline const int *getValues(const OGRField *psRawField)
1022 : {
1023 276 : return psRawField->IntegerList.paList;
1024 : }
1025 : };
1026 :
1027 : struct GetFromInteger64List
1028 : {
1029 242 : static inline int getCount(const OGRField *psRawField)
1030 : {
1031 242 : return psRawField->Integer64List.nCount;
1032 : }
1033 :
1034 120 : static inline const GIntBig *getValues(const OGRField *psRawField)
1035 : {
1036 120 : return psRawField->Integer64List.paList;
1037 : }
1038 : };
1039 :
1040 : struct GetFromRealList
1041 : {
1042 374 : static inline int getCount(const OGRField *psRawField)
1043 : {
1044 374 : return psRawField->RealList.nCount;
1045 : }
1046 :
1047 186 : static inline const double *getValues(const OGRField *psRawField)
1048 : {
1049 186 : return psRawField->RealList.paList;
1050 : }
1051 : };
1052 :
1053 : template <class OffsetType, class T, class GetFromList>
1054 : static size_t
1055 416 : FillListArray(struct ArrowArray *psChild,
1056 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1057 : const size_t nFeatureCountLimit, const bool bIsNullable,
1058 : const int i, const size_t nMemLimit)
1059 : {
1060 416 : psChild->n_buffers = 2;
1061 416 : psChild->buffers = static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1062 416 : uint8_t *pabyValidity = nullptr;
1063 : OffsetType *panOffsets =
1064 416 : static_cast<OffsetType *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
1065 : sizeof(OffsetType) * (1 + nFeatureCountLimit)));
1066 416 : if (panOffsets == nullptr)
1067 0 : return 0;
1068 416 : psChild->buffers[1] = panOffsets;
1069 :
1070 416 : OffsetType nOffset = 0;
1071 416 : size_t nFeatCount = 0;
1072 1445 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat, ++nFeatCount)
1073 : {
1074 1035 : panOffsets[iFeat] = nOffset;
1075 1035 : auto &poFeature = apoFeatures[iFeat];
1076 1035 : const auto psRawField = poFeature->GetRawFieldRef(i);
1077 1035 : if (IsValidField(psRawField))
1078 : {
1079 529 : const unsigned nCount = GetFromList::getCount(psRawField);
1080 529 : if (nCount > static_cast<size_t>(nMemLimit - nOffset))
1081 : {
1082 6 : if (nFeatCount == 0)
1083 3 : return 0;
1084 3 : break;
1085 : }
1086 523 : nOffset += static_cast<OffsetType>(nCount);
1087 : }
1088 506 : else if (bIsNullable)
1089 : {
1090 506 : ++psChild->null_count;
1091 506 : if (pabyValidity == nullptr)
1092 : {
1093 231 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1094 231 : psChild->buffers[0] = pabyValidity;
1095 231 : if (pabyValidity == nullptr)
1096 0 : return 0;
1097 : }
1098 506 : UnsetBit(pabyValidity, iFeat);
1099 : }
1100 : }
1101 413 : panOffsets[nFeatCount] = nOffset;
1102 :
1103 413 : psChild->n_children = 1;
1104 413 : psChild->children = static_cast<struct ArrowArray **>(
1105 413 : CPLCalloc(1, sizeof(struct ArrowArray *)));
1106 826 : psChild->children[0] = static_cast<struct ArrowArray *>(
1107 413 : CPLCalloc(1, sizeof(struct ArrowArray)));
1108 413 : auto psValueChild = psChild->children[0];
1109 :
1110 413 : psValueChild->release = OGRLayerDefaultReleaseArray;
1111 413 : psValueChild->n_buffers = 2;
1112 413 : psValueChild->buffers =
1113 413 : static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1114 413 : psValueChild->length = nOffset;
1115 : T *panValues = static_cast<T *>(
1116 413 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(T) * (nOffset + 1)));
1117 413 : if (panValues == nullptr)
1118 0 : return 0;
1119 413 : psValueChild->buffers[1] = panValues;
1120 :
1121 413 : nOffset = 0;
1122 1442 : for (size_t iFeat = 0; iFeat < nFeatCount; ++iFeat)
1123 : {
1124 1029 : auto &poFeature = apoFeatures[iFeat];
1125 1029 : const auto psRawField = poFeature->GetRawFieldRef(i);
1126 1029 : if (IsValidField(psRawField))
1127 : {
1128 523 : const int nCount = GetFromList::getCount(psRawField);
1129 523 : const auto paList = GetFromList::getValues(psRawField);
1130 : if (sizeof(*paList) == sizeof(T))
1131 456 : memcpy(panValues + nOffset, paList, nCount * sizeof(T));
1132 : else
1133 : {
1134 203 : for (int j = 0; j < nCount; ++j)
1135 : {
1136 136 : panValues[nOffset + j] = static_cast<T>(paList[j]);
1137 : }
1138 : }
1139 523 : nOffset += static_cast<OffsetType>(nCount);
1140 : }
1141 : }
1142 :
1143 413 : return nFeatCount;
1144 : }
1145 :
1146 : template <class OffsetType, class GetFromList>
1147 : static size_t
1148 49 : FillListArrayBool(struct ArrowArray *psChild,
1149 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1150 : const size_t nFeatureCountLimit, const bool bIsNullable,
1151 : const int i, const size_t nMemLimit)
1152 : {
1153 49 : psChild->n_buffers = 2;
1154 49 : psChild->buffers = static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1155 49 : uint8_t *pabyValidity = nullptr;
1156 : OffsetType *panOffsets =
1157 49 : static_cast<OffsetType *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
1158 : sizeof(OffsetType) * (1 + nFeatureCountLimit)));
1159 49 : if (panOffsets == nullptr)
1160 0 : return 0;
1161 49 : psChild->buffers[1] = panOffsets;
1162 :
1163 49 : OffsetType nOffset = 0;
1164 49 : size_t nFeatCount = 0;
1165 138 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat, ++nFeatCount)
1166 : {
1167 91 : panOffsets[iFeat] = nOffset;
1168 91 : auto &poFeature = apoFeatures[iFeat];
1169 91 : const auto psRawField = poFeature->GetRawFieldRef(i);
1170 91 : if (IsValidField(psRawField))
1171 : {
1172 60 : const unsigned nCount = GetFromList::getCount(psRawField);
1173 60 : if (nCount > static_cast<size_t>(nMemLimit - nOffset))
1174 : {
1175 2 : if (nFeatCount == 0)
1176 1 : return 0;
1177 1 : break;
1178 : }
1179 58 : nOffset += static_cast<OffsetType>(nCount);
1180 : }
1181 31 : else if (bIsNullable)
1182 : {
1183 31 : ++psChild->null_count;
1184 31 : if (pabyValidity == nullptr)
1185 : {
1186 27 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1187 27 : psChild->buffers[0] = pabyValidity;
1188 27 : if (pabyValidity == nullptr)
1189 0 : return 0;
1190 : }
1191 31 : UnsetBit(pabyValidity, iFeat);
1192 : }
1193 : }
1194 48 : panOffsets[nFeatCount] = nOffset;
1195 :
1196 48 : psChild->n_children = 1;
1197 48 : psChild->children = static_cast<struct ArrowArray **>(
1198 48 : CPLCalloc(1, sizeof(struct ArrowArray *)));
1199 96 : psChild->children[0] = static_cast<struct ArrowArray *>(
1200 48 : CPLCalloc(1, sizeof(struct ArrowArray)));
1201 48 : auto psValueChild = psChild->children[0];
1202 :
1203 48 : psValueChild->release = OGRLayerDefaultReleaseArray;
1204 48 : psValueChild->n_buffers = 2;
1205 48 : psValueChild->buffers =
1206 48 : static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1207 48 : psValueChild->length = nOffset;
1208 : uint8_t *panValues = static_cast<uint8_t *>(
1209 48 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE((nOffset + 7 + 1) / 8));
1210 48 : if (panValues == nullptr)
1211 0 : return 0;
1212 48 : memset(panValues, 0, (nOffset + 7) / 8);
1213 48 : psValueChild->buffers[1] = panValues;
1214 :
1215 48 : nOffset = 0;
1216 138 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat)
1217 : {
1218 90 : auto &poFeature = apoFeatures[iFeat];
1219 90 : const auto psRawField = poFeature->GetRawFieldRef(i);
1220 90 : if (IsValidField(psRawField))
1221 : {
1222 59 : const int nCount = GetFromList::getCount(psRawField);
1223 59 : const auto paList = GetFromList::getValues(psRawField);
1224 :
1225 373 : for (int j = 0; j < nCount; ++j)
1226 : {
1227 314 : if (paList[j])
1228 55 : SetBit(panValues, nOffset + j);
1229 : }
1230 59 : nOffset += static_cast<OffsetType>(nCount);
1231 : }
1232 : }
1233 :
1234 48 : return nFeatCount;
1235 : }
1236 :
1237 : /************************************************************************/
1238 : /* FillStringArray() */
1239 : /************************************************************************/
1240 :
1241 : template <class T>
1242 : static size_t
1243 3784 : FillStringArray(struct ArrowArray *psChild,
1244 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1245 : const size_t nFeatureCountLimit, const bool bIsNullable,
1246 : const int i, const size_t nMemLimit)
1247 : {
1248 3784 : psChild->n_buffers = 3;
1249 3784 : psChild->buffers = static_cast<const void **>(CPLCalloc(3, sizeof(void *)));
1250 3784 : uint8_t *pabyValidity = nullptr;
1251 : T *panOffsets = static_cast<T *>(
1252 3784 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(T) * (1 + nFeatureCountLimit)));
1253 3784 : if (panOffsets == nullptr)
1254 0 : return 0;
1255 3784 : psChild->buffers[1] = panOffsets;
1256 :
1257 3784 : size_t nOffset = 0;
1258 3784 : size_t nFeatCount = 0;
1259 34118 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat, ++nFeatCount)
1260 : {
1261 30354 : panOffsets[iFeat] = static_cast<T>(nOffset);
1262 30354 : const auto psRawField = apoFeatures[iFeat]->GetRawFieldRef(i);
1263 30354 : if (IsValidField(psRawField))
1264 : {
1265 27014 : const size_t nLen = strlen(psRawField->String);
1266 27014 : if (nLen > nMemLimit - nOffset)
1267 : {
1268 20 : if (nFeatCount == 0)
1269 19 : return 0;
1270 1 : break;
1271 : }
1272 26994 : nOffset += static_cast<T>(nLen);
1273 : }
1274 3340 : else if (bIsNullable)
1275 : {
1276 3340 : ++psChild->null_count;
1277 3340 : if (pabyValidity == nullptr)
1278 : {
1279 1131 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1280 1131 : psChild->buffers[0] = pabyValidity;
1281 1131 : if (pabyValidity == nullptr)
1282 0 : return 0;
1283 : }
1284 3340 : UnsetBit(pabyValidity, iFeat);
1285 : }
1286 : }
1287 3765 : panOffsets[nFeatCount] = static_cast<T>(nOffset);
1288 :
1289 : char *pachValues =
1290 3765 : static_cast<char *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nOffset + 1));
1291 3765 : if (pachValues == nullptr)
1292 0 : return 0;
1293 3765 : psChild->buffers[2] = pachValues;
1294 :
1295 3765 : nOffset = 0;
1296 34099 : for (size_t iFeat = 0; iFeat < nFeatCount; ++iFeat)
1297 : {
1298 30334 : const size_t nLen =
1299 30334 : static_cast<size_t>(panOffsets[iFeat + 1] - panOffsets[iFeat]);
1300 30334 : if (nLen)
1301 : {
1302 25274 : const auto psRawField = apoFeatures[iFeat]->GetRawFieldRef(i);
1303 25274 : memcpy(pachValues + nOffset, psRawField->String, nLen);
1304 25274 : nOffset += nLen;
1305 : }
1306 : }
1307 :
1308 3765 : return nFeatCount;
1309 : }
1310 :
1311 : /************************************************************************/
1312 : /* FillStringListArray() */
1313 : /************************************************************************/
1314 :
1315 : template <class OffsetType>
1316 : static size_t
1317 203 : FillStringListArray(struct ArrowArray *psChild,
1318 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1319 : const size_t nFeatureCountLimit, const bool bIsNullable,
1320 : const int i, const size_t nMemLimit)
1321 : {
1322 203 : psChild->n_buffers = 2;
1323 203 : psChild->buffers = static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1324 203 : uint8_t *pabyValidity = nullptr;
1325 : OffsetType *panOffsets =
1326 203 : static_cast<OffsetType *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
1327 : sizeof(OffsetType) * (1 + nFeatureCountLimit)));
1328 203 : if (panOffsets == nullptr)
1329 0 : return false;
1330 203 : psChild->buffers[1] = panOffsets;
1331 :
1332 203 : OffsetType nStrings = 0;
1333 203 : OffsetType nCountChars = 0;
1334 203 : size_t nFeatCount = 0;
1335 516 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat, ++nFeatCount)
1336 : {
1337 315 : panOffsets[iFeat] = nStrings;
1338 315 : auto &poFeature = apoFeatures[iFeat];
1339 315 : const auto psRawField = poFeature->GetRawFieldRef(i);
1340 315 : if (IsValidField(psRawField))
1341 : {
1342 108 : const int nCount = psRawField->StringList.nCount;
1343 108 : if (static_cast<size_t>(nCount) >
1344 108 : static_cast<size_t>(nMemLimit - nStrings))
1345 : {
1346 0 : if (nFeatCount == 0)
1347 0 : return 0;
1348 0 : goto after_loop;
1349 : }
1350 280 : for (int j = 0; j < nCount; ++j)
1351 : {
1352 174 : const size_t nLen = strlen(psRawField->StringList.paList[j]);
1353 174 : if (nLen > static_cast<size_t>(nMemLimit - nCountChars))
1354 : {
1355 2 : if (nFeatCount == 0)
1356 1 : return 0;
1357 1 : goto after_loop;
1358 : }
1359 172 : nCountChars += static_cast<OffsetType>(nLen);
1360 : }
1361 106 : nStrings += static_cast<OffsetType>(nCount);
1362 : }
1363 207 : else if (bIsNullable)
1364 : {
1365 207 : ++psChild->null_count;
1366 207 : if (pabyValidity == nullptr)
1367 : {
1368 152 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1369 152 : psChild->buffers[0] = pabyValidity;
1370 152 : if (pabyValidity == nullptr)
1371 0 : return 0;
1372 : }
1373 207 : UnsetBit(pabyValidity, iFeat);
1374 : }
1375 : }
1376 201 : after_loop:
1377 202 : panOffsets[nFeatCount] = nStrings;
1378 :
1379 202 : psChild->n_children = 1;
1380 202 : psChild->children = static_cast<struct ArrowArray **>(
1381 202 : CPLCalloc(1, sizeof(struct ArrowArray *)));
1382 404 : psChild->children[0] = static_cast<struct ArrowArray *>(
1383 202 : CPLCalloc(1, sizeof(struct ArrowArray)));
1384 202 : auto psValueChild = psChild->children[0];
1385 :
1386 202 : psValueChild->release = OGRLayerDefaultReleaseArray;
1387 202 : psValueChild->length = nStrings;
1388 202 : psValueChild->n_buffers = 3;
1389 202 : psValueChild->buffers =
1390 202 : static_cast<const void **>(CPLCalloc(3, sizeof(void *)));
1391 :
1392 : OffsetType *panChildOffsets = static_cast<OffsetType *>(
1393 202 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(OffsetType) * (1 + nStrings)));
1394 202 : if (panChildOffsets == nullptr)
1395 0 : return 0;
1396 202 : psValueChild->buffers[1] = panChildOffsets;
1397 :
1398 : char *pachValues =
1399 202 : static_cast<char *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nCountChars + 1));
1400 202 : if (pachValues == nullptr)
1401 0 : return 0;
1402 202 : psValueChild->buffers[2] = pachValues;
1403 :
1404 202 : nStrings = 0;
1405 202 : nCountChars = 0;
1406 515 : for (size_t iFeat = 0; iFeat < nFeatCount; ++iFeat)
1407 : {
1408 313 : auto &poFeature = apoFeatures[iFeat];
1409 313 : const auto psRawField = poFeature->GetRawFieldRef(i);
1410 313 : if (IsValidField(psRawField))
1411 : {
1412 106 : const int nCount = psRawField->StringList.nCount;
1413 278 : for (int j = 0; j < nCount; ++j)
1414 : {
1415 172 : panChildOffsets[nStrings] = nCountChars;
1416 172 : ++nStrings;
1417 172 : const size_t nLen = strlen(psRawField->StringList.paList[j]);
1418 172 : memcpy(pachValues + nCountChars,
1419 172 : psRawField->StringList.paList[j], nLen);
1420 172 : nCountChars += static_cast<OffsetType>(nLen);
1421 : }
1422 : }
1423 : }
1424 202 : panChildOffsets[nStrings] = nCountChars;
1425 :
1426 202 : return nFeatCount;
1427 : }
1428 :
1429 : /************************************************************************/
1430 : /* FillBinaryArray() */
1431 : /************************************************************************/
1432 :
1433 : template <class T>
1434 : static size_t
1435 905 : FillBinaryArray(struct ArrowArray *psChild,
1436 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1437 : const size_t nFeatureCountLimit, const bool bIsNullable,
1438 : const int i, const size_t nMemLimit)
1439 : {
1440 905 : psChild->n_buffers = 3;
1441 905 : psChild->buffers = static_cast<const void **>(CPLCalloc(3, sizeof(void *)));
1442 905 : uint8_t *pabyValidity = nullptr;
1443 : T *panOffsets = static_cast<T *>(
1444 905 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(T) * (1 + nFeatureCountLimit)));
1445 905 : if (panOffsets == nullptr)
1446 0 : return 0;
1447 905 : psChild->buffers[1] = panOffsets;
1448 :
1449 905 : T nOffset = 0;
1450 905 : size_t nFeatCount = 0;
1451 4362 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat, ++nFeatCount)
1452 : {
1453 3459 : panOffsets[iFeat] = nOffset;
1454 3459 : const auto psRawField = apoFeatures[iFeat]->GetRawFieldRef(i);
1455 3459 : if (IsValidField(psRawField))
1456 : {
1457 3402 : const size_t nLen = psRawField->Binary.nCount;
1458 3402 : if (nLen > static_cast<size_t>(nMemLimit - nOffset))
1459 : {
1460 2 : if (iFeat == 0)
1461 1 : return 0;
1462 1 : break;
1463 : }
1464 3400 : nOffset += static_cast<T>(nLen);
1465 : }
1466 57 : else if (bIsNullable)
1467 : {
1468 57 : ++psChild->null_count;
1469 57 : if (pabyValidity == nullptr)
1470 : {
1471 49 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1472 49 : psChild->buffers[0] = pabyValidity;
1473 49 : if (pabyValidity == nullptr)
1474 0 : return 0;
1475 : }
1476 57 : UnsetBit(pabyValidity, iFeat);
1477 : }
1478 : }
1479 904 : panOffsets[nFeatCount] = nOffset;
1480 :
1481 : GByte *pabyValues =
1482 904 : static_cast<GByte *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nOffset + 1));
1483 904 : if (pabyValues == nullptr)
1484 0 : return 0;
1485 904 : psChild->buffers[2] = pabyValues;
1486 :
1487 904 : nOffset = 0;
1488 4361 : for (size_t iFeat = 0; iFeat < nFeatCount; ++iFeat)
1489 : {
1490 3457 : const size_t nLen =
1491 3457 : static_cast<size_t>(panOffsets[iFeat + 1] - panOffsets[iFeat]);
1492 3457 : if (nLen)
1493 : {
1494 3400 : const auto psRawField = apoFeatures[iFeat]->GetRawFieldRef(i);
1495 3400 : memcpy(pabyValues + nOffset, psRawField->Binary.paData, nLen);
1496 3400 : nOffset += static_cast<T>(nLen);
1497 : }
1498 : }
1499 :
1500 904 : return nFeatCount;
1501 : }
1502 :
1503 : /************************************************************************/
1504 : /* FillFixedWidthBinaryArray() */
1505 : /************************************************************************/
1506 :
1507 : static bool
1508 8 : FillFixedWidthBinaryArray(struct ArrowArray *psChild,
1509 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1510 : const size_t nFeatureCountLimit,
1511 : const bool bIsNullable, const int nWidth, const int i)
1512 : {
1513 8 : psChild->n_buffers = 2;
1514 8 : psChild->buffers = static_cast<const void **>(CPLCalloc(3, sizeof(void *)));
1515 8 : uint8_t *pabyValidity = nullptr;
1516 :
1517 8 : assert(nFeatureCountLimit + 1 <=
1518 : std::numeric_limits<size_t>::max() / nWidth);
1519 : GByte *pabyValues = static_cast<GByte *>(
1520 8 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE((nFeatureCountLimit + 1) * nWidth));
1521 8 : if (pabyValues == nullptr)
1522 0 : return false;
1523 8 : psChild->buffers[1] = pabyValues;
1524 :
1525 29 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat)
1526 : {
1527 21 : const auto psRawField = apoFeatures[iFeat]->GetRawFieldRef(i);
1528 21 : if (IsValidField(psRawField))
1529 : {
1530 20 : const auto nLen = psRawField->Binary.nCount;
1531 20 : if (nLen < nWidth)
1532 : {
1533 0 : memcpy(pabyValues + iFeat * nWidth, psRawField->Binary.paData,
1534 : nLen);
1535 0 : memset(pabyValues + iFeat * nWidth + nLen, 0, nWidth - nLen);
1536 : }
1537 : else
1538 : {
1539 20 : memcpy(pabyValues + iFeat * nWidth, psRawField->Binary.paData,
1540 : nWidth);
1541 : }
1542 : }
1543 : else
1544 : {
1545 1 : memset(pabyValues + iFeat * nWidth, 0, nWidth);
1546 1 : if (bIsNullable)
1547 : {
1548 1 : ++psChild->null_count;
1549 1 : if (pabyValidity == nullptr)
1550 : {
1551 1 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1552 1 : psChild->buffers[0] = pabyValidity;
1553 1 : if (pabyValidity == nullptr)
1554 0 : return false;
1555 : }
1556 1 : UnsetBit(pabyValidity, iFeat);
1557 : }
1558 : }
1559 : }
1560 :
1561 8 : return true;
1562 : }
1563 :
1564 : /************************************************************************/
1565 : /* FillWKBGeometryArray() */
1566 : /************************************************************************/
1567 :
1568 : template <class T>
1569 : static size_t
1570 1227 : FillWKBGeometryArray(struct ArrowArray *psChild,
1571 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1572 : const size_t nFeatureCountLimit,
1573 : const OGRGeomFieldDefn *poFieldDefn, const int i,
1574 : const size_t nMemLimit)
1575 : {
1576 1227 : const bool bIsNullable = CPL_TO_BOOL(poFieldDefn->IsNullable());
1577 1227 : psChild->n_buffers = 3;
1578 1227 : psChild->buffers = static_cast<const void **>(CPLCalloc(3, sizeof(void *)));
1579 1227 : uint8_t *pabyValidity = nullptr;
1580 : T *panOffsets = static_cast<T *>(
1581 1227 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(T) * (1 + nFeatureCountLimit)));
1582 1227 : if (panOffsets == nullptr)
1583 0 : return 0;
1584 1227 : psChild->buffers[1] = panOffsets;
1585 1227 : const auto eGeomType = poFieldDefn->GetType();
1586 3681 : auto poEmptyGeom =
1587 : std::unique_ptr<OGRGeometry>(OGRGeometryFactory::createGeometry(
1588 1227 : (eGeomType == wkbNone || wkbFlatten(eGeomType) == wkbUnknown)
1589 : ? wkbGeometryCollection
1590 : : eGeomType));
1591 :
1592 1227 : size_t nOffset = 0;
1593 1227 : size_t nFeatCount = 0;
1594 14269 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat, ++nFeatCount)
1595 : {
1596 13043 : panOffsets[iFeat] = static_cast<T>(nOffset);
1597 13043 : const auto poGeom = apoFeatures[iFeat]->GetGeomFieldRef(i);
1598 13043 : if (poGeom != nullptr)
1599 : {
1600 12490 : const size_t nLen = poGeom->WkbSize();
1601 12490 : if (nLen > nMemLimit - nOffset)
1602 : {
1603 1 : if (nFeatCount == 0)
1604 0 : return 0;
1605 1 : break;
1606 : }
1607 12489 : nOffset += static_cast<T>(nLen);
1608 : }
1609 553 : else if (bIsNullable)
1610 : {
1611 553 : ++psChild->null_count;
1612 553 : if (pabyValidity == nullptr)
1613 : {
1614 271 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1615 271 : psChild->buffers[0] = pabyValidity;
1616 271 : if (pabyValidity == nullptr)
1617 0 : return 0;
1618 : }
1619 553 : UnsetBit(pabyValidity, iFeat);
1620 : }
1621 0 : else if (poEmptyGeom)
1622 : {
1623 0 : const size_t nLen = poEmptyGeom->WkbSize();
1624 0 : if (nLen > nMemLimit - nOffset)
1625 : {
1626 0 : if (nFeatCount == 0)
1627 0 : return 0;
1628 0 : break;
1629 : }
1630 0 : nOffset += static_cast<T>(nLen);
1631 : }
1632 : }
1633 1227 : panOffsets[nFeatCount] = static_cast<T>(nOffset);
1634 :
1635 : GByte *pabyValues =
1636 1227 : static_cast<GByte *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nOffset + 1));
1637 1227 : if (pabyValues == nullptr)
1638 0 : return 0;
1639 1227 : psChild->buffers[2] = pabyValues;
1640 :
1641 1227 : nOffset = 0;
1642 14269 : for (size_t iFeat = 0; iFeat < nFeatCount; ++iFeat)
1643 : {
1644 13042 : const size_t nLen =
1645 13042 : static_cast<size_t>(panOffsets[iFeat + 1] - panOffsets[iFeat]);
1646 13042 : if (nLen)
1647 : {
1648 12489 : const auto poGeom = apoFeatures[iFeat]->GetGeomFieldRef(i);
1649 12489 : poGeom->exportToWkb(wkbNDR, pabyValues + nOffset, wkbVariantIso);
1650 12489 : nOffset += nLen;
1651 : }
1652 553 : else if (!bIsNullable && poEmptyGeom)
1653 : {
1654 0 : poEmptyGeom->exportToWkb(wkbNDR, pabyValues + nOffset,
1655 : wkbVariantIso);
1656 0 : nOffset += nLen;
1657 : }
1658 : }
1659 :
1660 1227 : return nFeatCount;
1661 : }
1662 :
1663 : /************************************************************************/
1664 : /* FillDateArray() */
1665 : /************************************************************************/
1666 :
1667 125 : static bool FillDateArray(struct ArrowArray *psChild,
1668 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1669 : const size_t nFeatureCountLimit,
1670 : const bool bIsNullable, const int i)
1671 : {
1672 125 : psChild->n_buffers = 2;
1673 125 : psChild->buffers = static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1674 125 : uint8_t *pabyValidity = nullptr;
1675 125 : int32_t *panValues = static_cast<int32_t *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
1676 : sizeof(int32_t) * (nFeatureCountLimit + 1)));
1677 125 : if (panValues == nullptr)
1678 0 : return false;
1679 125 : psChild->buffers[1] = panValues;
1680 475 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat)
1681 : {
1682 350 : auto &poFeature = apoFeatures[iFeat];
1683 350 : const auto psRawField = poFeature->GetRawFieldRef(i);
1684 350 : if (IsValidField(psRawField))
1685 : {
1686 : struct tm brokenDown;
1687 262 : memset(&brokenDown, 0, sizeof(brokenDown));
1688 262 : brokenDown.tm_year = psRawField->Date.Year - 1900;
1689 262 : brokenDown.tm_mon = psRawField->Date.Month - 1;
1690 262 : brokenDown.tm_mday = psRawField->Date.Day;
1691 262 : panValues[iFeat] =
1692 262 : static_cast<int>(CPLYMDHMSToUnixTime(&brokenDown) / 86400);
1693 : }
1694 88 : else if (bIsNullable)
1695 : {
1696 88 : panValues[iFeat] = 0;
1697 88 : ++psChild->null_count;
1698 88 : if (pabyValidity == nullptr)
1699 : {
1700 61 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1701 61 : psChild->buffers[0] = pabyValidity;
1702 61 : if (pabyValidity == nullptr)
1703 0 : return false;
1704 : }
1705 88 : UnsetBit(pabyValidity, iFeat);
1706 : }
1707 : else
1708 : {
1709 0 : panValues[iFeat] = 0;
1710 : }
1711 : }
1712 125 : return true;
1713 : }
1714 :
1715 : /************************************************************************/
1716 : /* FillTimeArray() */
1717 : /************************************************************************/
1718 :
1719 72 : static bool FillTimeArray(struct ArrowArray *psChild,
1720 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1721 : const size_t nFeatureCountLimit,
1722 : const bool bIsNullable, const int i)
1723 : {
1724 72 : psChild->n_buffers = 2;
1725 72 : psChild->buffers = static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1726 72 : uint8_t *pabyValidity = nullptr;
1727 72 : int32_t *panValues = static_cast<int32_t *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
1728 : sizeof(int32_t) * (nFeatureCountLimit + 1)));
1729 72 : if (panValues == nullptr)
1730 0 : return false;
1731 72 : psChild->buffers[1] = panValues;
1732 667 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat)
1733 : {
1734 595 : auto &poFeature = apoFeatures[iFeat];
1735 595 : const auto psRawField = poFeature->GetRawFieldRef(i);
1736 595 : if (IsValidField(psRawField))
1737 : {
1738 548 : panValues[iFeat] =
1739 548 : psRawField->Date.Hour * 3600000 +
1740 548 : psRawField->Date.Minute * 60000 +
1741 548 : static_cast<int>(psRawField->Date.Second * 1000 + 0.5);
1742 : }
1743 47 : else if (bIsNullable)
1744 : {
1745 47 : panValues[iFeat] = 0;
1746 47 : ++psChild->null_count;
1747 47 : if (pabyValidity == nullptr)
1748 : {
1749 39 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1750 39 : psChild->buffers[0] = pabyValidity;
1751 39 : if (pabyValidity == nullptr)
1752 0 : return false;
1753 : }
1754 47 : UnsetBit(pabyValidity, iFeat);
1755 : }
1756 : else
1757 : {
1758 0 : panValues[iFeat] = 0;
1759 : }
1760 : }
1761 72 : return true;
1762 : }
1763 :
1764 : /************************************************************************/
1765 : /* FillDateTimeArray() */
1766 : /************************************************************************/
1767 :
1768 : static bool
1769 712 : FillDateTimeArray(struct ArrowArray *psChild,
1770 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1771 : const size_t nFeatureCountLimit, const bool bIsNullable,
1772 : const int i, int nFieldTZFlag)
1773 : {
1774 712 : psChild->n_buffers = 2;
1775 712 : psChild->buffers = static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1776 712 : uint8_t *pabyValidity = nullptr;
1777 712 : int64_t *panValues = static_cast<int64_t *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
1778 : sizeof(int64_t) * (nFeatureCountLimit + 1)));
1779 712 : if (panValues == nullptr)
1780 0 : return false;
1781 712 : psChild->buffers[1] = panValues;
1782 : struct tm brokenDown;
1783 712 : memset(&brokenDown, 0, sizeof(brokenDown));
1784 3141 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat)
1785 : {
1786 2429 : auto &poFeature = apoFeatures[iFeat];
1787 2429 : const auto psRawField = poFeature->GetRawFieldRef(i);
1788 2429 : if (IsValidField(psRawField))
1789 : {
1790 1670 : brokenDown.tm_year = psRawField->Date.Year - 1900;
1791 1670 : brokenDown.tm_mon = psRawField->Date.Month - 1;
1792 1670 : brokenDown.tm_mday = psRawField->Date.Day;
1793 1670 : brokenDown.tm_hour = psRawField->Date.Hour;
1794 1670 : brokenDown.tm_min = psRawField->Date.Minute;
1795 1670 : brokenDown.tm_sec = static_cast<int>(psRawField->Date.Second);
1796 : auto nVal =
1797 1670 : CPLYMDHMSToUnixTime(&brokenDown) * 1000 +
1798 1670 : (static_cast<int>(psRawField->Date.Second * 1000 + 0.5) % 1000);
1799 1670 : if (nFieldTZFlag >= OGR_TZFLAG_MIXED_TZ &&
1800 65 : psRawField->Date.TZFlag > OGR_TZFLAG_MIXED_TZ)
1801 : {
1802 : // Convert for psRawField->Date.TZFlag to UTC
1803 65 : const int TZOffset =
1804 65 : (psRawField->Date.TZFlag - OGR_TZFLAG_UTC) * 15;
1805 65 : const int TZOffsetMS = TZOffset * 60 * 1000;
1806 65 : nVal -= TZOffsetMS;
1807 : }
1808 1670 : panValues[iFeat] = nVal;
1809 : }
1810 759 : else if (bIsNullable)
1811 : {
1812 759 : panValues[iFeat] = 0;
1813 759 : ++psChild->null_count;
1814 759 : if (pabyValidity == nullptr)
1815 : {
1816 261 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1817 261 : psChild->buffers[0] = pabyValidity;
1818 261 : if (pabyValidity == nullptr)
1819 0 : return false;
1820 : }
1821 759 : UnsetBit(pabyValidity, iFeat);
1822 : }
1823 : else
1824 : {
1825 0 : panValues[iFeat] = 0;
1826 : }
1827 : }
1828 712 : return true;
1829 : }
1830 :
1831 : /************************************************************************/
1832 : /* FillDateTimeArrayAsString() */
1833 : /************************************************************************/
1834 :
1835 : static size_t
1836 8 : FillDateTimeArrayAsString(struct ArrowArray *psChild,
1837 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1838 : const size_t nFeatureCountLimit,
1839 : const bool bIsNullable, const int i,
1840 : const size_t nMemLimit)
1841 : {
1842 8 : psChild->n_buffers = 3;
1843 8 : psChild->buffers = static_cast<const void **>(CPLCalloc(3, sizeof(void *)));
1844 8 : uint8_t *pabyValidity = nullptr;
1845 : using T = uint32_t;
1846 : T *panOffsets = static_cast<T *>(
1847 8 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(T) * (1 + nFeatureCountLimit)));
1848 8 : if (panOffsets == nullptr)
1849 0 : return 0;
1850 8 : psChild->buffers[1] = panOffsets;
1851 :
1852 8 : size_t nOffset = 0;
1853 8 : size_t nFeatCount = 0;
1854 46 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat, ++nFeatCount)
1855 : {
1856 38 : panOffsets[iFeat] = static_cast<T>(nOffset);
1857 38 : const auto psRawField = apoFeatures[iFeat]->GetRawFieldRef(i);
1858 38 : if (IsValidField(psRawField))
1859 : {
1860 36 : size_t nLen = strlen("YYYY-MM-DDTHH:MM:SS");
1861 36 : if (fmodf(psRawField->Date.Second, 1.0f) != 0)
1862 27 : nLen += strlen(".sss");
1863 36 : if (psRawField->Date.TZFlag == OGR_TZFLAG_UTC)
1864 7 : nLen += 1; // 'Z'
1865 29 : else if (psRawField->Date.TZFlag > OGR_TZFLAG_MIXED_TZ)
1866 12 : nLen += strlen("+hh:mm");
1867 36 : if (nLen > nMemLimit - nOffset)
1868 : {
1869 0 : if (nFeatCount == 0)
1870 0 : return 0;
1871 0 : break;
1872 : }
1873 36 : nOffset += static_cast<T>(nLen);
1874 : }
1875 2 : else if (bIsNullable)
1876 : {
1877 2 : ++psChild->null_count;
1878 2 : if (pabyValidity == nullptr)
1879 : {
1880 2 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1881 2 : psChild->buffers[0] = pabyValidity;
1882 2 : if (pabyValidity == nullptr)
1883 0 : return 0;
1884 : }
1885 2 : UnsetBit(pabyValidity, iFeat);
1886 : }
1887 : }
1888 8 : panOffsets[nFeatCount] = static_cast<T>(nOffset);
1889 :
1890 : char *pachValues =
1891 8 : static_cast<char *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nOffset + 1));
1892 8 : if (pachValues == nullptr)
1893 0 : return 0;
1894 8 : psChild->buffers[2] = pachValues;
1895 :
1896 8 : nOffset = 0;
1897 : char szBuffer[OGR_SIZEOF_ISO8601_DATETIME_BUFFER];
1898 : OGRISO8601Format sFormat;
1899 8 : sFormat.ePrecision = OGRISO8601Precision::AUTO;
1900 46 : for (size_t iFeat = 0; iFeat < nFeatCount; ++iFeat)
1901 : {
1902 38 : const int nLen =
1903 38 : static_cast<int>(panOffsets[iFeat + 1] - panOffsets[iFeat]);
1904 38 : if (nLen)
1905 : {
1906 36 : const auto psRawField = apoFeatures[iFeat]->GetRawFieldRef(i);
1907 36 : int nBufSize = OGRGetISO8601DateTime(psRawField, sFormat, szBuffer);
1908 36 : if (nBufSize)
1909 : {
1910 36 : memcpy(pachValues + nOffset, szBuffer,
1911 36 : std::min(nLen, nBufSize));
1912 : }
1913 36 : if (nBufSize < nLen)
1914 : {
1915 5 : memset(pachValues + nOffset + nBufSize, 0, nLen - nBufSize);
1916 : }
1917 36 : nOffset += nLen;
1918 : }
1919 : }
1920 :
1921 8 : return nFeatCount;
1922 : }
1923 :
1924 : /************************************************************************/
1925 : /* GetNextArrowArray() */
1926 : /************************************************************************/
1927 :
1928 : /** Default implementation of the ArrowArrayStream::get_next() callback.
1929 : *
1930 : * To be used by driver implementations that have a custom GetArrowStream()
1931 : * implementation.
1932 : *
1933 : * @since GDAL 3.6
1934 : */
1935 3554 : int OGRLayer::GetNextArrowArray(struct ArrowArrayStream *stream,
1936 : struct ArrowArray *out_array)
1937 : {
1938 3554 : ArrowArrayStreamPrivateDataSharedDataWrapper *poPrivate =
1939 : static_cast<ArrowArrayStreamPrivateDataSharedDataWrapper *>(
1940 : stream->private_data);
1941 :
1942 3554 : const bool bIncludeFID = CPLTestBool(
1943 : m_aosArrowArrayStreamOptions.FetchNameValueDef("INCLUDE_FID", "YES"));
1944 3554 : const bool bDateTimeAsString = m_aosArrowArrayStreamOptions.FetchBool(
1945 : GAS_OPT_DATETIME_AS_STRING, false);
1946 3554 : int nMaxBatchSize = atoi(m_aosArrowArrayStreamOptions.FetchNameValueDef(
1947 : "MAX_FEATURES_IN_BATCH", "65536"));
1948 3554 : if (nMaxBatchSize <= 0)
1949 0 : nMaxBatchSize = 1;
1950 3554 : if (nMaxBatchSize > INT_MAX - 1)
1951 0 : nMaxBatchSize = INT_MAX - 1;
1952 :
1953 : auto &oFeatureQueue =
1954 3554 : m_poSharedArrowArrayStreamPrivateData->m_oFeatureQueue;
1955 :
1956 3554 : memset(out_array, 0, sizeof(*out_array));
1957 :
1958 3554 : auto poLayerDefn = GetLayerDefn();
1959 3554 : const int nFieldCount = poLayerDefn->GetFieldCount();
1960 3554 : const int nGeomFieldCount = poLayerDefn->GetGeomFieldCount();
1961 3554 : const int nMaxChildren =
1962 3554 : (bIncludeFID ? 1 : 0) + nFieldCount + nGeomFieldCount;
1963 3554 : int iSchemaChild = 0;
1964 :
1965 3554 : if (!m_poSharedArrowArrayStreamPrivateData->m_anQueriedFIDs.empty())
1966 : {
1967 6 : if (poPrivate->poShared->m_bEOF)
1968 : {
1969 2 : return 0;
1970 : }
1971 4 : if (m_poSharedArrowArrayStreamPrivateData->m_iQueriedFIDS == 0)
1972 : {
1973 4 : CPLDebug("OGR", "Using fast FID filtering");
1974 : }
1975 8 : while (
1976 24 : oFeatureQueue.size() < static_cast<size_t>(nMaxBatchSize) &&
1977 12 : m_poSharedArrowArrayStreamPrivateData->m_iQueriedFIDS <
1978 12 : m_poSharedArrowArrayStreamPrivateData->m_anQueriedFIDs.size())
1979 : {
1980 : const auto nFID =
1981 8 : m_poSharedArrowArrayStreamPrivateData->m_anQueriedFIDs
1982 8 : [m_poSharedArrowArrayStreamPrivateData->m_iQueriedFIDS];
1983 16 : auto poFeature = std::unique_ptr<OGRFeature>(GetFeature(nFID));
1984 8 : ++m_poSharedArrowArrayStreamPrivateData->m_iQueriedFIDS;
1985 8 : if (poFeature && (m_poFilterGeom == nullptr ||
1986 0 : FilterGeometry(poFeature->GetGeomFieldRef(
1987 8 : m_iGeomFieldFilter))))
1988 : {
1989 4 : oFeatureQueue.emplace_back(std::move(poFeature));
1990 : }
1991 : }
1992 8 : if (m_poSharedArrowArrayStreamPrivateData->m_iQueriedFIDS ==
1993 4 : m_poSharedArrowArrayStreamPrivateData->m_anQueriedFIDs.size())
1994 : {
1995 4 : poPrivate->poShared->m_bEOF = true;
1996 : }
1997 : }
1998 3548 : else if (!poPrivate->poShared->m_bEOF)
1999 : {
2000 18797 : while (oFeatureQueue.size() < static_cast<size_t>(nMaxBatchSize))
2001 : {
2002 18794 : auto poFeature = std::unique_ptr<OGRFeature>(GetNextFeature());
2003 18794 : if (!poFeature)
2004 : {
2005 1829 : poPrivate->poShared->m_bEOF = true;
2006 1829 : break;
2007 : }
2008 16965 : oFeatureQueue.emplace_back(std::move(poFeature));
2009 : }
2010 : }
2011 3552 : if (oFeatureQueue.empty())
2012 : {
2013 2105 : return 0;
2014 : }
2015 :
2016 1447 : out_array->release = OGRLayerDefaultReleaseArray;
2017 1447 : out_array->null_count = 0;
2018 :
2019 1447 : out_array->n_children = nMaxChildren;
2020 1447 : out_array->children = static_cast<struct ArrowArray **>(
2021 1447 : CPLCalloc(nMaxChildren, sizeof(struct ArrowArray *)));
2022 1447 : out_array->release = OGRLayerDefaultReleaseArray;
2023 1447 : out_array->n_buffers = 1;
2024 1447 : out_array->buffers =
2025 1447 : static_cast<const void **>(CPLCalloc(1, sizeof(void *)));
2026 :
2027 1447 : size_t nFeatureCount = oFeatureQueue.size();
2028 1447 : const uint32_t nMemLimit = OGRArrowArrayHelper::GetMemLimit();
2029 1447 : if (bIncludeFID)
2030 : {
2031 2638 : out_array->children[iSchemaChild] = static_cast<struct ArrowArray *>(
2032 1319 : CPLCalloc(1, sizeof(struct ArrowArray)));
2033 1319 : auto psChild = out_array->children[iSchemaChild];
2034 1319 : ++iSchemaChild;
2035 1319 : psChild->release = OGRLayerDefaultReleaseArray;
2036 1319 : psChild->n_buffers = 2;
2037 1319 : psChild->buffers =
2038 1319 : static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
2039 : int64_t *panValues =
2040 1319 : static_cast<int64_t *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
2041 : sizeof(int64_t) * (oFeatureQueue.size() + 1)));
2042 1319 : if (panValues == nullptr)
2043 0 : goto error;
2044 1319 : psChild->buffers[1] = panValues;
2045 17858 : for (size_t iFeat = 0; iFeat < oFeatureQueue.size(); ++iFeat)
2046 : {
2047 16539 : panValues[iFeat] = oFeatureQueue[iFeat]->GetFID();
2048 : }
2049 : }
2050 :
2051 13708 : for (int i = 0; i < nFieldCount; ++i)
2052 : {
2053 12286 : const auto poFieldDefn = poLayerDefn->GetFieldDefn(i);
2054 12286 : if (poFieldDefn->IsIgnored())
2055 : {
2056 13 : continue;
2057 : }
2058 :
2059 24546 : out_array->children[iSchemaChild] = static_cast<struct ArrowArray *>(
2060 12273 : CPLCalloc(1, sizeof(struct ArrowArray)));
2061 12273 : auto psChild = out_array->children[iSchemaChild];
2062 12273 : ++iSchemaChild;
2063 12273 : psChild->release = OGRLayerDefaultReleaseArray;
2064 12273 : const bool bIsNullable = CPL_TO_BOOL(poFieldDefn->IsNullable());
2065 12273 : const auto eSubType = poFieldDefn->GetSubType();
2066 12273 : switch (poFieldDefn->GetType())
2067 : {
2068 3630 : case OFTInteger:
2069 : {
2070 3630 : if (eSubType == OFSTBoolean)
2071 : {
2072 138 : if (!FillBoolArray(psChild, oFeatureQueue, nFeatureCount,
2073 : bIsNullable, &OGRField::Integer, i))
2074 0 : goto error;
2075 : }
2076 3492 : else if (eSubType == OFSTInt16)
2077 : {
2078 478 : if (!FillArray<int16_t>(psChild, oFeatureQueue,
2079 : nFeatureCount, bIsNullable,
2080 : &OGRField::Integer, i))
2081 0 : goto error;
2082 : }
2083 : else
2084 : {
2085 3014 : if (!FillArray<int32_t>(psChild, oFeatureQueue,
2086 : nFeatureCount, bIsNullable,
2087 : &OGRField::Integer, i))
2088 0 : goto error;
2089 : }
2090 :
2091 3630 : const auto &osDomainName = poFieldDefn->GetDomainName();
2092 3630 : if (!osDomainName.empty())
2093 : {
2094 13 : auto poDS = GetDataset();
2095 13 : if (poDS)
2096 : {
2097 : const auto poFieldDomain =
2098 13 : poDS->GetFieldDomain(osDomainName);
2099 26 : if (poFieldDomain &&
2100 13 : poFieldDomain->GetDomainType() == OFDT_CODED)
2101 : {
2102 13 : const OGRCodedFieldDomain *poCodedDomain =
2103 : static_cast<const OGRCodedFieldDomain *>(
2104 : poFieldDomain);
2105 13 : OGRArrowArrayHelper::FillDict(psChild,
2106 : poCodedDomain);
2107 : }
2108 : }
2109 : }
2110 :
2111 3630 : break;
2112 : }
2113 :
2114 298 : case OFTInteger64:
2115 : {
2116 298 : if (!FillArray<int64_t>(psChild, oFeatureQueue, nFeatureCount,
2117 : bIsNullable, &OGRField::Integer64, i))
2118 0 : goto error;
2119 298 : break;
2120 : }
2121 :
2122 2063 : case OFTReal:
2123 : {
2124 2063 : if (eSubType == OFSTFloat32)
2125 : {
2126 478 : if (!FillArray<float>(psChild, oFeatureQueue, nFeatureCount,
2127 : bIsNullable, &OGRField::Real, i))
2128 0 : goto error;
2129 : }
2130 : else
2131 : {
2132 1585 : if (!FillArray<double>(psChild, oFeatureQueue,
2133 : nFeatureCount, bIsNullable,
2134 : &OGRField::Real, i))
2135 0 : goto error;
2136 : }
2137 2063 : break;
2138 : }
2139 :
2140 3784 : case OFTString:
2141 : case OFTWideString:
2142 : {
2143 3784 : const size_t nThisFeatureCount = FillStringArray<int32_t>(
2144 : psChild, oFeatureQueue, nFeatureCount, bIsNullable, i,
2145 : nMemLimit);
2146 3784 : if (nThisFeatureCount == 0)
2147 : {
2148 19 : goto error_max_mem;
2149 : }
2150 3765 : if (nThisFeatureCount < nFeatureCount)
2151 1 : nFeatureCount = nThisFeatureCount;
2152 3765 : break;
2153 : }
2154 :
2155 913 : case OFTBinary:
2156 : {
2157 913 : const int nWidth = poFieldDefn->GetWidth();
2158 913 : if (nWidth > 0)
2159 : {
2160 8 : if (nFeatureCount > nMemLimit / nWidth)
2161 : {
2162 1 : nFeatureCount = nMemLimit / nWidth;
2163 1 : if (nFeatureCount == 0)
2164 0 : goto error_max_mem;
2165 : }
2166 8 : if (!FillFixedWidthBinaryArray(psChild, oFeatureQueue,
2167 : nFeatureCount, bIsNullable,
2168 : nWidth, i))
2169 0 : goto error;
2170 : }
2171 : else
2172 : {
2173 905 : const size_t nThisFeatureCount = FillBinaryArray<int32_t>(
2174 : psChild, oFeatureQueue, nFeatureCount, bIsNullable, i,
2175 : nMemLimit);
2176 905 : if (nThisFeatureCount == 0)
2177 : {
2178 1 : goto error_max_mem;
2179 : }
2180 904 : if (nThisFeatureCount < nFeatureCount)
2181 1 : nFeatureCount = nThisFeatureCount;
2182 : }
2183 912 : break;
2184 : }
2185 :
2186 234 : case OFTIntegerList:
2187 : {
2188 : size_t nThisFeatureCount;
2189 234 : if (eSubType == OFSTBoolean)
2190 : {
2191 : nThisFeatureCount =
2192 49 : FillListArrayBool<int32_t, GetFromIntegerList>(
2193 : psChild, oFeatureQueue, nFeatureCount, bIsNullable,
2194 : i, nMemLimit);
2195 : }
2196 185 : else if (eSubType == OFSTInt16)
2197 : {
2198 : nThisFeatureCount =
2199 28 : FillListArray<int32_t, int16_t, GetFromIntegerList>(
2200 : psChild, oFeatureQueue, nFeatureCount, bIsNullable,
2201 : i, nMemLimit);
2202 : }
2203 : else
2204 : {
2205 : nThisFeatureCount =
2206 157 : FillListArray<int32_t, int32_t, GetFromIntegerList>(
2207 : psChild, oFeatureQueue, nFeatureCount, bIsNullable,
2208 : i, nMemLimit);
2209 : }
2210 234 : if (nThisFeatureCount == 0)
2211 : {
2212 2 : goto error_max_mem;
2213 : }
2214 232 : if (nThisFeatureCount < nFeatureCount)
2215 2 : nFeatureCount = nThisFeatureCount;
2216 232 : break;
2217 : }
2218 :
2219 75 : case OFTInteger64List:
2220 : {
2221 : const size_t nThisFeatureCount =
2222 75 : FillListArray<int32_t, int64_t, GetFromInteger64List>(
2223 : psChild, oFeatureQueue, nFeatureCount, bIsNullable, i,
2224 : nMemLimit);
2225 75 : if (nThisFeatureCount == 0)
2226 : {
2227 1 : goto error_max_mem;
2228 : }
2229 74 : if (nThisFeatureCount < nFeatureCount)
2230 1 : nFeatureCount = nThisFeatureCount;
2231 74 : break;
2232 : }
2233 :
2234 156 : case OFTRealList:
2235 : {
2236 : size_t nThisFeatureCount;
2237 156 : if (eSubType == OFSTFloat32)
2238 : {
2239 : nThisFeatureCount =
2240 41 : FillListArray<int32_t, float, GetFromRealList>(
2241 : psChild, oFeatureQueue, nFeatureCount, bIsNullable,
2242 : i, nMemLimit);
2243 : }
2244 : else
2245 : {
2246 : nThisFeatureCount =
2247 115 : FillListArray<int32_t, double, GetFromRealList>(
2248 : psChild, oFeatureQueue, nFeatureCount, bIsNullable,
2249 : i, nMemLimit);
2250 : }
2251 156 : if (nThisFeatureCount == 0)
2252 : {
2253 1 : goto error_max_mem;
2254 : }
2255 155 : if (nThisFeatureCount < nFeatureCount)
2256 1 : nFeatureCount = nThisFeatureCount;
2257 155 : break;
2258 : }
2259 :
2260 203 : case OFTStringList:
2261 : case OFTWideStringList:
2262 : {
2263 203 : const size_t nThisFeatureCount = FillStringListArray<int32_t>(
2264 : psChild, oFeatureQueue, nFeatureCount, bIsNullable, i,
2265 : nMemLimit);
2266 203 : if (nThisFeatureCount == 0)
2267 : {
2268 1 : goto error_max_mem;
2269 : }
2270 202 : if (nThisFeatureCount < nFeatureCount)
2271 1 : nFeatureCount = nThisFeatureCount;
2272 202 : break;
2273 : }
2274 :
2275 125 : case OFTDate:
2276 : {
2277 125 : if (!FillDateArray(psChild, oFeatureQueue, nFeatureCount,
2278 : bIsNullable, i))
2279 0 : goto error;
2280 125 : break;
2281 : }
2282 :
2283 72 : case OFTTime:
2284 : {
2285 72 : if (!FillTimeArray(psChild, oFeatureQueue, nFeatureCount,
2286 : bIsNullable, i))
2287 0 : goto error;
2288 72 : break;
2289 : }
2290 :
2291 720 : case OFTDateTime:
2292 : {
2293 720 : if (bDateTimeAsString)
2294 : {
2295 8 : const size_t nThisFeatureCount = FillDateTimeArrayAsString(
2296 : psChild, oFeatureQueue, nFeatureCount, bIsNullable, i,
2297 : nMemLimit);
2298 8 : if (nThisFeatureCount == 0)
2299 : {
2300 0 : goto error_max_mem;
2301 : }
2302 8 : if (nThisFeatureCount < nFeatureCount)
2303 0 : nFeatureCount = nThisFeatureCount;
2304 : }
2305 : else
2306 : {
2307 712 : if (!FillDateTimeArray(psChild, oFeatureQueue,
2308 : nFeatureCount, bIsNullable, i,
2309 : poFieldDefn->GetTZFlag()))
2310 0 : goto error;
2311 : }
2312 720 : break;
2313 : }
2314 : }
2315 : }
2316 2652 : for (int i = 0; i < nGeomFieldCount; ++i)
2317 : {
2318 1230 : const auto poFieldDefn = poLayerDefn->GetGeomFieldDefn(i);
2319 1230 : if (poFieldDefn->IsIgnored())
2320 : {
2321 3 : continue;
2322 : }
2323 :
2324 2454 : out_array->children[iSchemaChild] = static_cast<struct ArrowArray *>(
2325 1227 : CPLCalloc(1, sizeof(struct ArrowArray)));
2326 1227 : auto psChild = out_array->children[iSchemaChild];
2327 1227 : ++iSchemaChild;
2328 1227 : psChild->release = OGRLayerDefaultReleaseArray;
2329 1227 : psChild->length = oFeatureQueue.size();
2330 1227 : const size_t nThisFeatureCount = FillWKBGeometryArray<int32_t>(
2331 : psChild, oFeatureQueue, nFeatureCount, poFieldDefn, i, nMemLimit);
2332 1227 : if (nThisFeatureCount == 0)
2333 : {
2334 0 : goto error_max_mem;
2335 : }
2336 1227 : if (nThisFeatureCount < nFeatureCount)
2337 1 : nFeatureCount = nThisFeatureCount;
2338 : }
2339 :
2340 : // Remove consumed features from the queue
2341 1422 : if (nFeatureCount == oFeatureQueue.size())
2342 1413 : oFeatureQueue.clear();
2343 : else
2344 : {
2345 27 : for (size_t i = 0; i < nFeatureCount; ++i)
2346 : {
2347 18 : oFeatureQueue.pop_front();
2348 : }
2349 : }
2350 :
2351 1422 : out_array->n_children = iSchemaChild;
2352 1422 : out_array->length = nFeatureCount;
2353 16112 : for (int i = 0; i < out_array->n_children; ++i)
2354 : {
2355 14690 : out_array->children[i]->length = nFeatureCount;
2356 : }
2357 :
2358 1422 : return 0;
2359 :
2360 25 : error_max_mem:
2361 25 : CPLError(CE_Failure, CPLE_AppDefined,
2362 : "Too large feature: not even a single feature can be returned");
2363 25 : error:
2364 25 : oFeatureQueue.clear();
2365 25 : poPrivate->poShared->m_bEOF = true;
2366 25 : out_array->release(out_array);
2367 25 : memset(out_array, 0, sizeof(*out_array));
2368 25 : return ENOMEM;
2369 : }
2370 :
2371 : /************************************************************************/
2372 : /* StaticGetNextArrowArray() */
2373 : /************************************************************************/
2374 :
2375 : /** Default implementation of the ArrowArrayStream::get_next() callback.
2376 : *
2377 : * To be used by driver implementations that have a custom GetArrowStream()
2378 : * implementation.
2379 : *
2380 : * @since GDAL 3.6
2381 : */
2382 4520 : int OGRLayer::StaticGetNextArrowArray(struct ArrowArrayStream *stream,
2383 : struct ArrowArray *out_array)
2384 : {
2385 : auto poLayer = static_cast<ArrowArrayStreamPrivateDataSharedDataWrapper *>(
2386 4520 : stream->private_data)
2387 4520 : ->poShared->m_poLayer;
2388 4520 : if (poLayer == nullptr)
2389 : {
2390 1 : CPLError(CE_Failure, CPLE_NotSupported,
2391 : "Calling get_next() on a freed OGRLayer is not supported");
2392 1 : return EINVAL;
2393 : }
2394 4519 : return poLayer->GetNextArrowArray(stream, out_array);
2395 : }
2396 :
2397 : /************************************************************************/
2398 : /* ReleaseStream() */
2399 : /************************************************************************/
2400 :
2401 : /** Release a ArrowArrayStream.
2402 : *
2403 : * To be used by driver implementations that have a custom GetArrowStream()
2404 : * implementation.
2405 : *
2406 : * @param stream Arrow array stream to release.
2407 : * @since GDAL 3.6
2408 : */
2409 2222 : void OGRLayer::ReleaseStream(struct ArrowArrayStream *stream)
2410 : {
2411 2222 : assert(stream->release == OGRLayer::ReleaseStream);
2412 2222 : ArrowArrayStreamPrivateDataSharedDataWrapper *poPrivate =
2413 : static_cast<ArrowArrayStreamPrivateDataSharedDataWrapper *>(
2414 : stream->private_data);
2415 2222 : poPrivate->poShared->m_bArrowArrayStreamInProgress = false;
2416 2222 : poPrivate->poShared->m_bEOF = false;
2417 2222 : if (poPrivate->poShared->m_poLayer)
2418 2170 : poPrivate->poShared->m_poLayer->ResetReading();
2419 2222 : delete poPrivate;
2420 2222 : stream->private_data = nullptr;
2421 2222 : stream->release = nullptr;
2422 2222 : }
2423 :
2424 : /************************************************************************/
2425 : /* GetLastErrorArrowArrayStream() */
2426 : /************************************************************************/
2427 :
2428 : /** Default implementation of the ArrowArrayStream::get_last_error() callback.
2429 : *
2430 : * To be used by driver implementations that have a custom GetArrowStream()
2431 : * implementation.
2432 : *
2433 : * @since GDAL 3.6
2434 : */
2435 3 : const char *OGRLayer::GetLastErrorArrowArrayStream(struct ArrowArrayStream *)
2436 : {
2437 3 : const char *pszLastErrorMsg = CPLGetLastErrorMsg();
2438 3 : return pszLastErrorMsg[0] != '\0' ? pszLastErrorMsg : nullptr;
2439 : }
2440 :
2441 : /************************************************************************/
2442 : /* GetArrowStream() */
2443 : /************************************************************************/
2444 :
2445 : /** Get a Arrow C stream.
2446 : *
2447 : * On successful return, and when the stream interfaces is no longer needed, it
2448 : * must must be freed with out_stream->release(out_stream). Please carefully
2449 : * read https://arrow.apache.org/docs/format/CStreamInterface.html for more
2450 : * details on using Arrow C stream.
2451 : *
2452 : * The method may take into account ignored fields set with SetIgnoredFields()
2453 : * (the default implementation does), and should take into account filters set
2454 : * with SetSpatialFilter() and SetAttributeFilter(). Note however that
2455 : * specialized implementations may fallback to the default (slower)
2456 : * implementation when filters are set.
2457 : * Drivers that have a specialized implementation should advertise the
2458 : * OLCFastGetArrowStream capability.
2459 : *
2460 : * There are extra precautions to take into account in a OGR context. Unless
2461 : * otherwise specified by a particular driver implementation, the get_schema(),
2462 : * get_next() and get_last_error() function pointers of the ArrowArrayStream
2463 : * structure should no longer be used after the OGRLayer, from which the
2464 : * ArrowArrayStream structure was initialized, has been destroyed (typically at
2465 : * dataset closing). The reason is that those function pointers will typically
2466 : * point to methods of the OGRLayer instance.
2467 : * However, the ArrowSchema and ArrowArray structures filled from those
2468 : * callbacks can be used and must be released independently from the
2469 : * ArrowArrayStream or the layer.
2470 : *
2471 : * Furthermore, unless otherwise specified by a particular driver
2472 : * implementation, only one ArrowArrayStream can be active at a time on
2473 : * a given layer (that is the last active one must be explicitly released before
2474 : * a next one is asked). Changing filter state, ignored columns, modifying the
2475 : * schema or using ResetReading()/GetNextFeature() while using a
2476 : * ArrowArrayStream is strongly discouraged and may lead to unexpected results.
2477 : * As a rule of thumb, no OGRLayer methods that affect the state of a layer
2478 : * should be called on a layer, while an ArrowArrayStream on it is active.
2479 : *
2480 : * Starting with GDAL 3.8, the ArrowSchema::metadata field filled by the
2481 : * get_schema() callback may be set with the potential following items:
2482 : * <ul>
2483 : * <li>"GDAL:OGR:type": value of OGRFieldDefn::GetType(): (added in 3.11)
2484 : * Only used for DateTime fields when the DATETIME_AS_STRING=YES option is
2485 : * specified.</li>
2486 : * <li>"GDAL:OGR:alternative_name": value of
2487 : * OGRFieldDefn::GetAlternativeNameRef()</li>
2488 : * <li>"GDAL:OGR:comment": value of OGRFieldDefn::GetComment()</li>
2489 : * <li>"GDAL:OGR:default": value of OGRFieldDefn::GetDefault()</li>
2490 : * <li>"GDAL:OGR:subtype": value of OGRFieldDefn::GetSubType()</li>
2491 : * <li>"GDAL:OGR:width": value of OGRFieldDefn::GetWidth() (serialized as a
2492 : * string)</li>
2493 : * <li>"GDAL:OGR:unique": value of OGRFieldDefn::IsUnique() (serialized as
2494 : * "true" or "false")</li>
2495 : * <li>"GDAL:OGR:domain_name": value of OGRFieldDefn::GetDomainName()</li>
2496 : * </ul>
2497 : *
2498 : * A potential usage can be:
2499 : \code{.cpp}
2500 : struct ArrowArrayStream stream;
2501 : if( !poLayer->GetArrowStream(&stream, nullptr))
2502 : {
2503 : CPLError(CE_Failure, CPLE_AppDefined, "GetArrowStream() failed\n");
2504 : exit(1);
2505 : }
2506 : struct ArrowSchema schema;
2507 : if( stream.get_schema(&stream, &schema) == 0 )
2508 : {
2509 : // Do something useful
2510 : schema.release(schema);
2511 : }
2512 : while( true )
2513 : {
2514 : struct ArrowArray array;
2515 : // Look for an error (get_next() returning a non-zero code), or
2516 : // end of iteration (array.release == nullptr)
2517 : if( stream.get_next(&stream, &array) != 0 ||
2518 : array.release == nullptr )
2519 : {
2520 : break;
2521 : }
2522 : // Do something useful
2523 : array.release(&array);
2524 : }
2525 : stream.release(&stream);
2526 : \endcode
2527 : *
2528 : * A full example is available in the
2529 : * <a
2530 : href="https://gdal.org/tutorials/vector_api_tut.html#reading-from-ogr-using-the-arrow-c-stream-data-interface">Reading
2531 : From OGR using the Arrow C Stream data interface</a> tutorial.
2532 : *
2533 : * Options may be driver specific. The default implementation recognizes the
2534 : * following options:
2535 : * <ul>
2536 : * <li>INCLUDE_FID=YES/NO. Whether to include the FID column. Defaults to YES.
2537 : * </li>
2538 : * <li>MAX_FEATURES_IN_BATCH=integer. Maximum number of features to retrieve in
2539 : * a ArrowArray batch. Defaults to 65 536.</li>
2540 : * <li>TIMEZONE="unknown", "UTC", "(+|:)HH:MM" or any other value supported by
2541 : * Arrow. (GDAL >= 3.8)
2542 : * Override the timezone flag nominally provided by
2543 : * OGRFieldDefn::GetTZFlag(), and used for the Arrow field timezone
2544 : * declaration, with a user specified timezone.
2545 : * Note that datetime values in Arrow arrays are always stored in UTC, and
2546 : * that the time zone flag used by GDAL to convert to UTC is the one of the
2547 : * OGRField::Date::TZFlag member at the OGRFeature level. The conversion
2548 : * to UTC of a OGRField::Date is only done if both the timezone indicated by
2549 : * OGRField::Date::TZFlag and the one at the OGRFieldDefn level (or set by
2550 : * this TIMEZONE option) are not unknown.</li>
2551 : * <li>DATETIME_AS_STRING=YES/NO. Defaults to NO. Added in GDAL 3.11.
2552 : * Whether DateTime fields should be returned as a (normally ISO-8601
2553 : * formatted) string by drivers. The aim is to be able to handle mixed
2554 : * timezones (or timezone naive values) in the same column.
2555 : * All drivers must honour that option, and potentially fallback to the
2556 : * OGRLayer generic implementation if they cannot (which is the case for the
2557 : * Arrow, Parquet and ADBC drivers).
2558 : * When DATETIME_AS_STRING=YES, the TIMEZONE option is ignored.
2559 : * </li>
2560 : * <li>GEOMETRY_METADATA_ENCODING=OGC/GEOARROW (GDAL >= 3.8).
2561 : * The default is OGC, which will lead to setting
2562 : * the Arrow geometry column metadata to ARROW:extension:name=ogc.wkb.
2563 : * If setting to GEOMETRY_METADATA_ENCODING to GEOARROW,
2564 : * ARROW:extension:name=geoarrow.wkb and
2565 : * ARROW:extension:metadata={"crs": <projjson CRS representation>> are set.
2566 : * </li>
2567 : * </ul>
2568 : *
2569 : * The Arrow/Parquet drivers recognize the following option:
2570 : * <ul>
2571 : * <li>GEOMETRY_ENCODING=WKB. To force a fallback to the generic implementation
2572 : * when the native geometry encoding is not WKB. Otherwise the geometry
2573 : * will be returned with its native Arrow encoding
2574 : * (possibly using GeoArrow encoding).</li>
2575 : * </ul>
2576 : *
2577 : * @param out_stream Output stream. Must *not* be NULL. The content of the
2578 : * structure does not need to be initialized.
2579 : * @param papszOptions NULL terminated list of key=value options.
2580 : * @return true in case of success.
2581 : * @since GDAL 3.6
2582 : */
2583 2226 : bool OGRLayer::GetArrowStream(struct ArrowArrayStream *out_stream,
2584 : CSLConstList papszOptions)
2585 : {
2586 2226 : memset(out_stream, 0, sizeof(*out_stream));
2587 3775 : if (m_poSharedArrowArrayStreamPrivateData &&
2588 3775 : m_poSharedArrowArrayStreamPrivateData->m_bArrowArrayStreamInProgress)
2589 : {
2590 4 : CPLError(CE_Failure, CPLE_AppDefined,
2591 : "An arrow Arrow Stream is in progress on that layer. Only "
2592 : "one at a time is allowed in this implementation.");
2593 4 : return false;
2594 : }
2595 2222 : m_aosArrowArrayStreamOptions.Assign(CSLDuplicate(papszOptions), true);
2596 :
2597 2222 : out_stream->get_schema = OGRLayer::StaticGetArrowSchema;
2598 2222 : out_stream->get_next = OGRLayer::StaticGetNextArrowArray;
2599 2222 : out_stream->get_last_error = OGRLayer::GetLastErrorArrowArrayStream;
2600 2222 : out_stream->release = OGRLayer::ReleaseStream;
2601 :
2602 2222 : if (m_poSharedArrowArrayStreamPrivateData == nullptr)
2603 : {
2604 : m_poSharedArrowArrayStreamPrivateData =
2605 677 : std::make_shared<ArrowArrayStreamPrivateData>();
2606 677 : m_poSharedArrowArrayStreamPrivateData->m_poLayer = this;
2607 : }
2608 2222 : m_poSharedArrowArrayStreamPrivateData->m_bArrowArrayStreamInProgress = true;
2609 :
2610 : // Special case for "FID = constant", or "FID IN (constant1, ...., constantN)"
2611 2222 : m_poSharedArrowArrayStreamPrivateData->m_anQueriedFIDs.clear();
2612 2222 : m_poSharedArrowArrayStreamPrivateData->m_iQueriedFIDS = 0;
2613 2222 : if (m_poAttrQuery)
2614 : {
2615 : swq_expr_node *poNode =
2616 1054 : static_cast<swq_expr_node *>(m_poAttrQuery->GetSWQExpr());
2617 3162 : if (poNode->eNodeType == SNT_OPERATION &&
2618 1054 : (poNode->nOperation == SWQ_IN || poNode->nOperation == SWQ_EQ) &&
2619 825 : poNode->papoSubExpr[0]->eNodeType == SNT_COLUMN &&
2620 286 : poNode->papoSubExpr[0]->field_index ==
2621 2117 : GetLayerDefn()->GetFieldCount() + SPF_FID &&
2622 9 : TestCapability(OLCRandomRead))
2623 : {
2624 8 : std::set<GIntBig> oSetAlreadyListed;
2625 13 : for (int i = 1; i < poNode->nSubExprCount; ++i)
2626 : {
2627 27 : if (poNode->papoSubExpr[i]->eNodeType == SNT_CONSTANT &&
2628 18 : poNode->papoSubExpr[i]->field_type == SWQ_INTEGER64 &&
2629 9 : oSetAlreadyListed.find(poNode->papoSubExpr[i]->int_value) ==
2630 18 : oSetAlreadyListed.end())
2631 : {
2632 8 : oSetAlreadyListed.insert(poNode->papoSubExpr[i]->int_value);
2633 8 : m_poSharedArrowArrayStreamPrivateData->m_anQueriedFIDs
2634 8 : .push_back(poNode->papoSubExpr[i]->int_value);
2635 : }
2636 : }
2637 : }
2638 : }
2639 :
2640 2222 : auto poPrivateData = new ArrowArrayStreamPrivateDataSharedDataWrapper();
2641 2222 : poPrivateData->poShared = m_poSharedArrowArrayStreamPrivateData;
2642 2222 : out_stream->private_data = poPrivateData;
2643 2222 : return true;
2644 : }
2645 :
2646 : /************************************************************************/
2647 : /* OGR_L_GetArrowStream() */
2648 : /************************************************************************/
2649 :
2650 : /** Get a Arrow C stream.
2651 : *
2652 : * On successful return, and when the stream interfaces is no longer needed, it
2653 : * must be freed with out_stream->release(out_stream). Please carefully read
2654 : * https://arrow.apache.org/docs/format/CStreamInterface.html for more details
2655 : * on using Arrow C stream.
2656 : *
2657 : * The method may take into account ignored fields set with SetIgnoredFields()
2658 : * (the default implementation does), and should take into account filters set
2659 : * with SetSpatialFilter() and SetAttributeFilter(). Note however that
2660 : * specialized implementations may fallback to the default (slower)
2661 : * implementation when filters are set.
2662 : * Drivers that have a specialized implementation should
2663 : * advertise the OLCFastGetArrowStream capability.
2664 : *
2665 : * There are extra precautions to take into account in a OGR context. Unless
2666 : * otherwise specified by a particular driver implementation, the get_schema(),
2667 : * get_next() and get_last_error() function pointers of the ArrowArrayStream
2668 : * structure should no longer be used after the OGRLayer, from which the
2669 : * ArrowArrayStream structure was initialized, has been destroyed (typically at
2670 : * dataset closing). The reason is that those function pointers will typically
2671 : * point to methods of the OGRLayer instance.
2672 : * However, the ArrowSchema and ArrowArray structures filled from those
2673 : * callbacks can be used and must be released independently from the
2674 : * ArrowArrayStream or the layer.
2675 : *
2676 : * Furthermore, unless otherwise specified by a particular driver
2677 : * implementation, only one ArrowArrayStream can be active at a time on
2678 : * a given layer (that is the last active one must be explicitly released before
2679 : * a next one is asked). Changing filter state, ignored columns, modifying the
2680 : * schema or using ResetReading()/GetNextFeature() while using a
2681 : * ArrowArrayStream is strongly discouraged and may lead to unexpected results.
2682 : * As a rule of thumb, no OGRLayer methods that affect the state of a layer
2683 : * should be called on a layer, while an ArrowArrayStream on it is active.
2684 : *
2685 : * Starting with GDAL 3.8, the ArrowSchema::metadata field filled by the
2686 : * get_schema() callback may be set with the potential following items:
2687 : * <ul>
2688 : * <li>"GDAL:OGR:type": value of OGRFieldDefn::GetType(): (added in 3.11)
2689 : * Only used for DateTime fields when the DATETIME_AS_STRING=YES option is
2690 : * specified.</li>
2691 : * <li>"GDAL:OGR:alternative_name": value of
2692 : * OGRFieldDefn::GetAlternativeNameRef()</li>
2693 : * <li>"GDAL:OGR:comment": value of OGRFieldDefn::GetComment()</li>
2694 : * <li>"GDAL:OGR:default": value of OGRFieldDefn::GetDefault()</li>
2695 : * <li>"GDAL:OGR:subtype": value of OGRFieldDefn::GetSubType()</li>
2696 : * <li>"GDAL:OGR:width": value of OGRFieldDefn::GetWidth() (serialized as a
2697 : * string)</li>
2698 : * <li>"GDAL:OGR:unique": value of OGRFieldDefn::IsUnique() (serialized as
2699 : * "true" or "false")</li>
2700 : * <li>"GDAL:OGR:domain_name": value of OGRFieldDefn::GetDomainName()</li>
2701 : * </ul>
2702 : *
2703 : * A potential usage can be:
2704 : \code{.cpp}
2705 : struct ArrowArrayStream stream;
2706 : if( !OGR_L_GetArrowStream(hLayer, &stream, nullptr))
2707 : {
2708 : CPLError(CE_Failure, CPLE_AppDefined,
2709 : "OGR_L_GetArrowStream() failed\n");
2710 : exit(1);
2711 : }
2712 : struct ArrowSchema schema;
2713 : if( stream.get_schema(&stream, &schema) == 0 )
2714 : {
2715 : // Do something useful
2716 : schema.release(schema);
2717 : }
2718 : while( true )
2719 : {
2720 : struct ArrowArray array;
2721 : // Look for an error (get_next() returning a non-zero code), or
2722 : // end of iteration (array.release == nullptr)
2723 : if( stream.get_next(&stream, &array) != 0 ||
2724 : array.release == nullptr )
2725 : {
2726 : break;
2727 : }
2728 : // Do something useful
2729 : array.release(&array);
2730 : }
2731 : stream.release(&stream);
2732 : \endcode
2733 : *
2734 : * A full example is available in the
2735 : * <a
2736 : href="https://gdal.org/tutorials/vector_api_tut.html#reading-from-ogr-using-the-arrow-c-stream-data-interface">Reading
2737 : From OGR using the Arrow C Stream data interface</a> tutorial.
2738 : *
2739 : * Options may be driver specific. The default implementation recognizes the
2740 : * following options:
2741 : * <ul>
2742 : * <li>INCLUDE_FID=YES/NO. Whether to include the FID column. Defaults to
2743 : YES.</li>
2744 : * <li>MAX_FEATURES_IN_BATCH=integer. Maximum number of features to retrieve in
2745 : * a ArrowArray batch. Defaults to 65 536.</li>
2746 : * <li>TIMEZONE="unknown", "UTC", "(+|:)HH:MM" or any other value supported by
2747 : * Arrow. (GDAL >= 3.8)
2748 : * Override the timezone flag nominally provided by
2749 : * OGRFieldDefn::GetTZFlag(), and used for the Arrow field timezone
2750 : * declaration, with a user specified timezone.
2751 : * Note that datetime values in Arrow arrays are always stored in UTC, and
2752 : * that the time zone flag used by GDAL to convert to UTC is the one of the
2753 : * OGRField::Date::TZFlag member at the OGRFeature level. The conversion
2754 : * to UTC of a OGRField::Date is only done if both the timezone indicated by
2755 : * OGRField::Date::TZFlag and the one at the OGRFieldDefn level (or set by
2756 : * this TIMEZONE option) are not unknown.</li>
2757 : * <li>DATETIME_AS_STRING=YES/NO. Defaults to NO. Added in GDAL 3.11.
2758 : * Whether DateTime fields should be returned as a (normally ISO-8601
2759 : * formatted) string by drivers. The aim is to be able to handle mixed
2760 : * timezones (or timezone naive values) in the same column.
2761 : * All drivers must honour that option, and potentially fallback to the
2762 : * OGRLayer generic implementation if they cannot (which is the case for the
2763 : * Arrow, Parquet and ADBC drivers).
2764 : * When DATETIME_AS_STRING=YES, the TIMEZONE option is ignored.
2765 : * </li>
2766 : * <li>GEOMETRY_METADATA_ENCODING=OGC/GEOARROW (GDAL >= 3.8).
2767 : * The default is OGC, which will lead to setting
2768 : * the Arrow geometry column metadata to ARROW:extension:name=ogc.wkb.
2769 : * If setting to GEOMETRY_METADATA_ENCODING to GEOARROW,
2770 : * ARROW:extension:name=geoarrow.wkb and
2771 : * ARROW:extension:metadata={"crs": <projjson CRS representation>> are set.
2772 : * </li>
2773 : * </ul>
2774 : *
2775 : * The Arrow/Parquet drivers recognize the following option:
2776 : * <ul>
2777 : * <li>GEOMETRY_ENCODING=WKB. To force a fallback to the generic implementation
2778 : * when the native geometry encoding is not WKB. Otherwise the geometry
2779 : * will be returned with its native Arrow encoding
2780 : * (possibly using GeoArrow encoding).</li>
2781 : * </ul>
2782 : *
2783 : * @param hLayer Layer
2784 : * @param out_stream Output stream. Must *not* be NULL. The content of the
2785 : * structure does not need to be initialized.
2786 : * @param papszOptions NULL terminated list of key=value options.
2787 : * @return true in case of success.
2788 : * @since GDAL 3.6
2789 : */
2790 373 : bool OGR_L_GetArrowStream(OGRLayerH hLayer, struct ArrowArrayStream *out_stream,
2791 : char **papszOptions)
2792 : {
2793 373 : VALIDATE_POINTER1(hLayer, "OGR_L_GetArrowStream", false);
2794 373 : VALIDATE_POINTER1(out_stream, "OGR_L_GetArrowStream", false);
2795 :
2796 746 : return OGRLayer::FromHandle(hLayer)->GetArrowStream(out_stream,
2797 373 : papszOptions);
2798 : }
2799 :
2800 : /************************************************************************/
2801 : /* OGRParseArrowMetadata() */
2802 : /************************************************************************/
2803 :
2804 : std::map<std::string, std::string>
2805 191 : OGRParseArrowMetadata(const char *pabyMetadata)
2806 : {
2807 191 : std::map<std::string, std::string> oMetadata;
2808 : int32_t nKVP;
2809 191 : memcpy(&nKVP, pabyMetadata, sizeof(int32_t));
2810 191 : pabyMetadata += sizeof(int32_t);
2811 391 : for (int i = 0; i < nKVP; ++i)
2812 : {
2813 : int32_t nSizeKey;
2814 200 : memcpy(&nSizeKey, pabyMetadata, sizeof(int32_t));
2815 200 : pabyMetadata += sizeof(int32_t);
2816 400 : std::string osKey;
2817 200 : osKey.assign(pabyMetadata, nSizeKey);
2818 200 : pabyMetadata += nSizeKey;
2819 :
2820 : int32_t nSizeValue;
2821 200 : memcpy(&nSizeValue, pabyMetadata, sizeof(int32_t));
2822 200 : pabyMetadata += sizeof(int32_t);
2823 400 : std::string osValue;
2824 200 : osValue.assign(pabyMetadata, nSizeValue);
2825 200 : pabyMetadata += nSizeValue;
2826 :
2827 200 : oMetadata[osKey] = std::move(osValue);
2828 : }
2829 :
2830 382 : return oMetadata;
2831 : }
2832 :
2833 : /************************************************************************/
2834 : /* ParseDecimalFormat() */
2835 : /************************************************************************/
2836 :
2837 686 : static bool ParseDecimalFormat(const char *format, int &nPrecision, int &nScale,
2838 : int &nWidthInBytes)
2839 : {
2840 : // d:19,10 ==> decimal128 [precision 19, scale 10]
2841 : // d:19,10,NNN ==> decimal bitwidth = NNN [precision 19, scale 10]
2842 686 : nPrecision = 0;
2843 686 : nScale = 0;
2844 686 : nWidthInBytes = 128 / 8; // 128 bit
2845 686 : const char *pszFirstComma = strchr(format + 2, ',');
2846 686 : if (pszFirstComma)
2847 : {
2848 686 : nPrecision = atoi(format + 2);
2849 686 : nScale = atoi(pszFirstComma + 1);
2850 686 : const char *pszSecondComma = strchr(pszFirstComma + 1, ',');
2851 686 : if (pszSecondComma)
2852 : {
2853 274 : const int nWidthInBits = atoi(pszSecondComma + 1);
2854 274 : if ((nWidthInBits % 8) != 0)
2855 : {
2856 : // shouldn't happen for well-format schemas
2857 0 : nWidthInBytes = 0;
2858 0 : return false;
2859 : }
2860 : else
2861 : {
2862 274 : nWidthInBytes = nWidthInBits / 8;
2863 : }
2864 : }
2865 : }
2866 : else
2867 : {
2868 : // shouldn't happen for well-format schemas
2869 0 : nWidthInBytes = 0;
2870 0 : return false;
2871 : }
2872 686 : return true;
2873 : }
2874 :
2875 : /************************************************************************/
2876 : /* GetErrorIfUnsupportedDecimal() */
2877 : /************************************************************************/
2878 :
2879 55 : static const char *GetErrorIfUnsupportedDecimal(int nWidthInBytes,
2880 : int nPrecision)
2881 : {
2882 :
2883 55 : if (nWidthInBytes != 128 / 8 && nWidthInBytes != 256 / 8)
2884 : {
2885 0 : return "For decimal field, only width 128 and 256 are supported";
2886 : }
2887 :
2888 : // precision=19 fits on 64 bits
2889 55 : if (nPrecision <= 0 || nPrecision > 19)
2890 : {
2891 0 : return "For decimal field, only precision up to 19 is supported";
2892 : }
2893 :
2894 55 : return nullptr;
2895 : }
2896 :
2897 : /************************************************************************/
2898 : /* IsHandledSchema() */
2899 : /************************************************************************/
2900 :
2901 15760 : static bool IsHandledSchema(bool bTopLevel, const struct ArrowSchema *schema,
2902 : const std::string &osPrefix, bool bHasAttrQuery,
2903 : const CPLStringList &aosUsedFields)
2904 : {
2905 15760 : const char *format = schema->format;
2906 15760 : if (IsStructure(format))
2907 : {
2908 12285 : for (int64_t i = 0; i < schema->n_children; ++i)
2909 : {
2910 44156 : if (!IsHandledSchema(/* bTopLevel = */ false,
2911 11039 : schema->children[static_cast<size_t>(i)],
2912 24772 : bTopLevel ? std::string()
2913 13733 : : osPrefix + schema->name + ".",
2914 : bHasAttrQuery, aosUsedFields))
2915 : {
2916 0 : return false;
2917 : }
2918 : }
2919 1246 : return true;
2920 : }
2921 :
2922 : // Lists or maps
2923 25169 : if (IsList(format) || IsLargeList(format) || IsFixedSizeList(format) ||
2924 10655 : IsMap(format))
2925 : {
2926 4566 : if (!IsHandledSchema(/* bTopLevel = */ false, schema->children[0],
2927 : osPrefix, bHasAttrQuery, aosUsedFields))
2928 : {
2929 0 : return false;
2930 : }
2931 : // For now, we can't filter on lists or maps
2932 4566 : if (aosUsedFields.FindString((osPrefix + schema->name).c_str()) >= 0)
2933 : {
2934 0 : CPLDebug("OGR",
2935 : "Field %s has unhandled format '%s' for an "
2936 : "attribute to filter on",
2937 0 : (osPrefix + schema->name).c_str(), format);
2938 0 : return false;
2939 : }
2940 4566 : return true;
2941 : }
2942 :
2943 9948 : const char *const apszHandledFormats[] = {
2944 : "b", // boolean
2945 : "c", // int8
2946 : "C", // uint8
2947 : "s", // int16
2948 : "S", // uint16
2949 : "i", // int32
2950 : "I", // uint32
2951 : "l", // int64
2952 : "L", // uint64
2953 : "e", // float16
2954 : "f", // float32
2955 : "g", // float64,
2956 : "z", // binary
2957 : "Z", // large binary
2958 : "u", // UTF-8 string
2959 : "U", // large UTF-8 string
2960 : "tdD", // date32[days]
2961 : "tdm", // date64[milliseconds]
2962 : "tts", //time32 [seconds]
2963 : "ttm", //time32 [milliseconds]
2964 : "ttu", //time64 [microseconds]
2965 : "ttn", //time64 [nanoseconds]
2966 : };
2967 :
2968 115231 : for (const char *pszHandledFormat : apszHandledFormats)
2969 : {
2970 113923 : if (strcmp(format, pszHandledFormat) == 0)
2971 : {
2972 8640 : return true;
2973 : }
2974 : }
2975 :
2976 1308 : if (IsDecimal(format))
2977 : {
2978 790 : if (bHasAttrQuery &&
2979 790 : aosUsedFields.FindString((osPrefix + schema->name).c_str()) >= 0)
2980 : {
2981 2 : int nPrecision = 0;
2982 2 : int nScale = 0;
2983 2 : int nWidthInBytes = 0;
2984 2 : if (!ParseDecimalFormat(format, nPrecision, nScale, nWidthInBytes))
2985 : {
2986 0 : CPLDebug("OGR", "%s",
2987 0 : (std::string("Invalid field format ") + format +
2988 0 : " for field " + osPrefix + schema->name)
2989 : .c_str());
2990 0 : return false;
2991 : }
2992 :
2993 : const char *pszError =
2994 2 : GetErrorIfUnsupportedDecimal(nWidthInBytes, nPrecision);
2995 2 : if (pszError)
2996 : {
2997 0 : CPLDebug("OGR", "%s", pszError);
2998 0 : return false;
2999 : }
3000 : }
3001 412 : return true;
3002 : }
3003 :
3004 896 : if (IsFixedWidthBinary(format) || IsTimestamp(format))
3005 : {
3006 896 : return true;
3007 : }
3008 :
3009 0 : CPLDebug("OGR", "Field %s has unhandled format '%s'",
3010 0 : (osPrefix + schema->name).c_str(), format);
3011 0 : return false;
3012 : }
3013 :
3014 : /************************************************************************/
3015 : /* OGRLayer::CanPostFilterArrowArray() */
3016 : /************************************************************************/
3017 :
3018 : /** Whether the PostFilterArrowArray() can work on the schema to remove
3019 : * rows that aren't selected by the spatial or attribute filter.
3020 : */
3021 155 : bool OGRLayer::CanPostFilterArrowArray(const struct ArrowSchema *schema) const
3022 : {
3023 155 : if (!IsHandledSchema(
3024 155 : /* bTopLevel=*/true, schema, std::string(),
3025 155 : m_poAttrQuery != nullptr,
3026 310 : m_poAttrQuery ? CPLStringList(m_poAttrQuery->GetUsedFields())
3027 : : CPLStringList()))
3028 : {
3029 0 : return false;
3030 : }
3031 :
3032 155 : if (m_poFilterGeom)
3033 : {
3034 22 : bool bFound = false;
3035 : const char *pszGeomFieldName =
3036 : const_cast<OGRLayer *>(this)
3037 22 : ->GetLayerDefn()
3038 22 : ->GetGeomFieldDefn(m_iGeomFieldFilter)
3039 22 : ->GetNameRef();
3040 839 : for (int64_t i = 0; i < schema->n_children; ++i)
3041 : {
3042 839 : const auto fieldSchema = schema->children[i];
3043 839 : if (strcmp(fieldSchema->name, pszGeomFieldName) == 0)
3044 : {
3045 23 : if (!IsBinary(fieldSchema->format) &&
3046 1 : !IsLargeBinary(fieldSchema->format))
3047 : {
3048 1 : CPLDebug("OGR", "Geometry field %s has handled format '%s'",
3049 : fieldSchema->name, fieldSchema->format);
3050 1 : return false;
3051 : }
3052 :
3053 : // Check if ARROW:extension:name = ogc.wkb
3054 21 : const char *pabyMetadata = fieldSchema->metadata;
3055 21 : if (!pabyMetadata)
3056 : {
3057 0 : CPLDebug(
3058 : "OGR",
3059 : "Geometry field %s lacks metadata in its schema field",
3060 : fieldSchema->name);
3061 0 : return false;
3062 : }
3063 :
3064 21 : const auto oMetadata = OGRParseArrowMetadata(pabyMetadata);
3065 21 : auto oIter = oMetadata.find(ARROW_EXTENSION_NAME_KEY);
3066 21 : if (oIter == oMetadata.end())
3067 : {
3068 0 : CPLDebug("OGR",
3069 : "Geometry field %s lacks "
3070 : "%s metadata "
3071 : "in its schema field",
3072 : fieldSchema->name, ARROW_EXTENSION_NAME_KEY);
3073 0 : return false;
3074 : }
3075 21 : if (oIter->second != EXTENSION_NAME_OGC_WKB &&
3076 0 : oIter->second != EXTENSION_NAME_GEOARROW_WKB)
3077 : {
3078 0 : CPLDebug("OGR",
3079 : "Geometry field %s has unexpected "
3080 : "%s = '%s' metadata "
3081 : "in its schema field",
3082 : fieldSchema->name, ARROW_EXTENSION_NAME_KEY,
3083 0 : oIter->second.c_str());
3084 0 : return false;
3085 : }
3086 :
3087 21 : bFound = true;
3088 21 : break;
3089 : }
3090 : }
3091 21 : if (!bFound)
3092 : {
3093 0 : CPLDebug("OGR", "Cannot find geometry field %s in schema",
3094 : pszGeomFieldName);
3095 0 : return false;
3096 : }
3097 : }
3098 :
3099 154 : return true;
3100 : }
3101 :
3102 : #if 0
3103 : /************************************************************************/
3104 : /* CheckValidityBuffer() */
3105 : /************************************************************************/
3106 :
3107 : static void CheckValidityBuffer(const struct ArrowArray *array)
3108 : {
3109 : if (array->null_count < 0)
3110 : return;
3111 : const uint8_t *pabyValidity =
3112 : static_cast<const uint8_t *>(const_cast<const void *>(array->buffers[0]));
3113 : if( !pabyValidity )
3114 : {
3115 : CPLAssert(array->null_count == 0);
3116 : return;
3117 : }
3118 : size_t null_count = 0;
3119 : const size_t nOffset = static_cast<size_t>(array->offset);
3120 : for(size_t i = 0; i < static_cast<size_t>(array->length); ++i )
3121 : {
3122 : if (!TestBit(pabyValidity, i + nOffset))
3123 : ++ null_count;
3124 : }
3125 : CPLAssert(static_cast<size_t>(array->null_count) == null_count);
3126 : }
3127 : #endif
3128 :
3129 : /************************************************************************/
3130 : /* CompactValidityBuffer() */
3131 : /************************************************************************/
3132 :
3133 7610 : static void CompactValidityBuffer(
3134 : const struct ArrowSchema *, struct ArrowArray *array, size_t iStart,
3135 : const std::vector<bool> &abyValidityFromFilters, size_t nNewLength)
3136 : {
3137 : // Invalidate null_count as the same validity buffer may be used when
3138 : // scrolling batches, and this creates confusion if we try to set it
3139 : // to different values among the batches
3140 7610 : if (array->null_count <= 0)
3141 : {
3142 4186 : array->null_count = -1;
3143 4186 : return;
3144 : }
3145 3424 : array->null_count = -1;
3146 :
3147 3424 : CPLAssert(static_cast<size_t>(array->length) >=
3148 : iStart + abyValidityFromFilters.size());
3149 3424 : uint8_t *pabyValidity =
3150 3424 : static_cast<uint8_t *>(const_cast<void *>(array->buffers[0]));
3151 3424 : const size_t nLength = abyValidityFromFilters.size();
3152 3424 : const size_t nOffset = static_cast<size_t>(array->offset);
3153 3424 : size_t j = iStart + nOffset;
3154 12563 : for (size_t i = 0; i < nLength && j < nNewLength + nOffset; ++i)
3155 : {
3156 9139 : if (abyValidityFromFilters[i])
3157 : {
3158 5663 : if (TestBit(pabyValidity, i + iStart + nOffset))
3159 4307 : SetBit(pabyValidity, j);
3160 : else
3161 1356 : UnsetBit(pabyValidity, j);
3162 5663 : ++j;
3163 : }
3164 : }
3165 : }
3166 :
3167 : /************************************************************************/
3168 : /* CompactBoolArray() */
3169 : /************************************************************************/
3170 :
3171 224 : static void CompactBoolArray(const struct ArrowSchema *schema,
3172 : struct ArrowArray *array, size_t iStart,
3173 : const std::vector<bool> &abyValidityFromFilters,
3174 : size_t nNewLength)
3175 : {
3176 224 : CPLAssert(array->n_children == 0);
3177 224 : CPLAssert(array->n_buffers == 2);
3178 224 : CPLAssert(static_cast<size_t>(array->length) >=
3179 : iStart + abyValidityFromFilters.size());
3180 :
3181 224 : const size_t nLength = abyValidityFromFilters.size();
3182 224 : const size_t nOffset = static_cast<size_t>(array->offset);
3183 224 : uint8_t *pabyData =
3184 224 : static_cast<uint8_t *>(const_cast<void *>(array->buffers[1]));
3185 224 : size_t j = iStart + nOffset;
3186 1147 : for (size_t i = 0; i < nLength; ++i)
3187 : {
3188 923 : if (abyValidityFromFilters[i])
3189 : {
3190 424 : if (TestBit(pabyData, i + iStart + nOffset))
3191 199 : SetBit(pabyData, j);
3192 : else
3193 225 : UnsetBit(pabyData, j);
3194 :
3195 424 : ++j;
3196 : }
3197 : }
3198 :
3199 224 : if (schema->flags & ARROW_FLAG_NULLABLE)
3200 224 : CompactValidityBuffer(schema, array, iStart, abyValidityFromFilters,
3201 : nNewLength);
3202 :
3203 224 : array->length = nNewLength;
3204 224 : }
3205 :
3206 : /************************************************************************/
3207 : /* CompactPrimitiveArray() */
3208 : /************************************************************************/
3209 :
3210 : template <class T>
3211 3503 : static void CompactPrimitiveArray(
3212 : const struct ArrowSchema *schema, struct ArrowArray *array, size_t iStart,
3213 : const std::vector<bool> &abyValidityFromFilters, size_t nNewLength)
3214 : {
3215 3503 : CPLAssert(array->n_children == 0);
3216 3503 : CPLAssert(array->n_buffers == 2);
3217 3503 : CPLAssert(static_cast<size_t>(array->length) >=
3218 : iStart + abyValidityFromFilters.size());
3219 :
3220 3503 : const size_t nLength = abyValidityFromFilters.size();
3221 3503 : const size_t nOffset = static_cast<size_t>(array->offset);
3222 3503 : T *paData =
3223 3503 : static_cast<T *>(const_cast<void *>(array->buffers[1])) + nOffset;
3224 3503 : size_t j = iStart;
3225 17682 : for (size_t i = 0; i < nLength; ++i)
3226 : {
3227 14179 : if (abyValidityFromFilters[i])
3228 : {
3229 6206 : paData[j] = paData[i + iStart];
3230 6206 : ++j;
3231 : }
3232 : }
3233 :
3234 3503 : if (schema->flags & ARROW_FLAG_NULLABLE)
3235 3492 : CompactValidityBuffer(schema, array, iStart, abyValidityFromFilters,
3236 : nNewLength);
3237 :
3238 3503 : array->length = nNewLength;
3239 3503 : }
3240 :
3241 : /************************************************************************/
3242 : /* CompactStringOrBinaryArray() */
3243 : /************************************************************************/
3244 :
3245 : template <class OffsetType>
3246 1187 : static void CompactStringOrBinaryArray(
3247 : const struct ArrowSchema *schema, struct ArrowArray *array, size_t iStart,
3248 : const std::vector<bool> &abyValidityFromFilters, size_t nNewLength)
3249 : {
3250 1187 : CPLAssert(array->n_children == 0);
3251 1187 : CPLAssert(array->n_buffers == 3);
3252 1187 : CPLAssert(static_cast<size_t>(array->length) >=
3253 : iStart + abyValidityFromFilters.size());
3254 :
3255 1187 : const size_t nLength = abyValidityFromFilters.size();
3256 1187 : const size_t nOffset = static_cast<size_t>(array->offset);
3257 1187 : OffsetType *panOffsets =
3258 1187 : static_cast<OffsetType *>(const_cast<void *>(array->buffers[1])) +
3259 : nOffset;
3260 1187 : GByte *pabyData =
3261 1187 : static_cast<GByte *>(const_cast<void *>(array->buffers[2]));
3262 1187 : size_t j = iStart;
3263 1187 : OffsetType nCurOffset = panOffsets[iStart];
3264 5103 : for (size_t i = 0; i < nLength; ++i)
3265 : {
3266 3916 : if (abyValidityFromFilters[i])
3267 : {
3268 1768 : const auto nStartOffset = panOffsets[i + iStart];
3269 1768 : const auto nEndOffset = panOffsets[i + iStart + 1];
3270 1768 : panOffsets[j] = nCurOffset;
3271 1768 : const auto nSize = static_cast<size_t>(nEndOffset - nStartOffset);
3272 1768 : if (nSize)
3273 : {
3274 1562 : if (nCurOffset < nStartOffset)
3275 : {
3276 636 : memmove(pabyData + nCurOffset, pabyData + nStartOffset,
3277 : nSize);
3278 : }
3279 1562 : nCurOffset += static_cast<OffsetType>(nSize);
3280 : }
3281 1768 : ++j;
3282 : }
3283 : }
3284 1187 : panOffsets[j] = nCurOffset;
3285 :
3286 1187 : if (schema->flags & ARROW_FLAG_NULLABLE)
3287 806 : CompactValidityBuffer(schema, array, iStart, abyValidityFromFilters,
3288 : nNewLength);
3289 :
3290 1187 : array->length = nNewLength;
3291 1187 : }
3292 :
3293 : /************************************************************************/
3294 : /* CompactFixedWidthArray() */
3295 : /************************************************************************/
3296 :
3297 : static void
3298 305 : CompactFixedWidthArray(const struct ArrowSchema *schema,
3299 : struct ArrowArray *array, int nWidth, size_t iStart,
3300 : const std::vector<bool> &abyValidityFromFilters,
3301 : size_t nNewLength)
3302 : {
3303 305 : CPLAssert(array->n_children == 0);
3304 305 : CPLAssert(array->n_buffers == 2);
3305 305 : CPLAssert(static_cast<size_t>(array->length) >=
3306 : iStart + abyValidityFromFilters.size());
3307 :
3308 305 : const size_t nLength = abyValidityFromFilters.size();
3309 305 : const size_t nOffset = static_cast<size_t>(array->offset);
3310 305 : GByte *pabyData =
3311 305 : static_cast<GByte *>(const_cast<void *>(array->buffers[1]));
3312 305 : size_t nStartOffset = (iStart + nOffset) * nWidth;
3313 305 : size_t nCurOffset = nStartOffset;
3314 1133 : for (size_t i = 0; i < nLength; ++i, nStartOffset += nWidth)
3315 : {
3316 828 : if (abyValidityFromFilters[i])
3317 : {
3318 391 : if (nCurOffset < nStartOffset)
3319 : {
3320 210 : memcpy(pabyData + nCurOffset, pabyData + nStartOffset, nWidth);
3321 : }
3322 391 : nCurOffset += nWidth;
3323 : }
3324 : }
3325 :
3326 305 : if (schema->flags & ARROW_FLAG_NULLABLE)
3327 305 : CompactValidityBuffer(schema, array, iStart, abyValidityFromFilters,
3328 : nNewLength);
3329 :
3330 305 : array->length = nNewLength;
3331 305 : }
3332 :
3333 : /************************************************************************/
3334 : /* CompactStructArray() */
3335 : /************************************************************************/
3336 :
3337 : static bool CompactArray(const struct ArrowSchema *schema,
3338 : struct ArrowArray *array, size_t iStart,
3339 : const std::vector<bool> &abyValidityFromFilters,
3340 : size_t nNewLength);
3341 :
3342 665 : static bool CompactStructArray(const struct ArrowSchema *schema,
3343 : struct ArrowArray *array, size_t iStart,
3344 : const std::vector<bool> &abyValidityFromFilters,
3345 : size_t nNewLength)
3346 : {
3347 : // The equality might not be strict in the case of when some sub-arrays
3348 : // are fully void !
3349 665 : CPLAssert(array->n_children <= schema->n_children);
3350 6646 : for (int64_t iField = 0; iField < array->n_children; ++iField)
3351 : {
3352 5981 : const auto psChildSchema = schema->children[iField];
3353 5981 : const auto psChildArray = array->children[iField];
3354 : // To please Arrow validation...
3355 5981 : const size_t nChildNewLength =
3356 5981 : static_cast<size_t>(array->offset) + nNewLength;
3357 5981 : if (psChildArray->length > array->length)
3358 : {
3359 120 : std::vector<bool> abyChildValidity(abyValidityFromFilters);
3360 120 : abyChildValidity.resize(
3361 120 : abyValidityFromFilters.size() +
3362 120 : static_cast<size_t>(psChildArray->length - array->length),
3363 : false);
3364 120 : if (!CompactArray(psChildSchema, psChildArray, iStart,
3365 : abyChildValidity, nChildNewLength))
3366 : {
3367 0 : return false;
3368 : }
3369 : }
3370 : else
3371 : {
3372 5861 : if (!CompactArray(psChildSchema, psChildArray, iStart,
3373 : abyValidityFromFilters, nChildNewLength))
3374 : {
3375 0 : return false;
3376 : }
3377 : }
3378 5981 : CPLAssert(psChildArray->length ==
3379 : static_cast<int64_t>(nChildNewLength));
3380 : }
3381 :
3382 665 : if (schema->flags & ARROW_FLAG_NULLABLE)
3383 201 : CompactValidityBuffer(schema, array, iStart, abyValidityFromFilters,
3384 : nNewLength);
3385 :
3386 665 : array->length = nNewLength;
3387 :
3388 665 : return true;
3389 : }
3390 :
3391 : /************************************************************************/
3392 : /* InvalidateNullCountRec() */
3393 : /************************************************************************/
3394 :
3395 570 : static void InvalidateNullCountRec(const struct ArrowSchema *schema,
3396 : struct ArrowArray *array)
3397 : {
3398 570 : if (schema->flags & ARROW_FLAG_NULLABLE)
3399 210 : array->null_count = -1;
3400 960 : for (int i = 0; i < array->n_children; ++i)
3401 390 : InvalidateNullCountRec(schema->children[i], array->children[i]);
3402 570 : }
3403 :
3404 : /************************************************************************/
3405 : /* CompactListArray() */
3406 : /************************************************************************/
3407 :
3408 : template <class OffsetType>
3409 1773 : static bool CompactListArray(const struct ArrowSchema *schema,
3410 : struct ArrowArray *array, size_t iStart,
3411 : const std::vector<bool> &abyValidityFromFilters,
3412 : size_t nNewLength)
3413 : {
3414 1773 : CPLAssert(static_cast<size_t>(array->length) >=
3415 : iStart + abyValidityFromFilters.size());
3416 1773 : CPLAssert(array->n_children == 1);
3417 1773 : CPLAssert(array->n_buffers == 2);
3418 :
3419 1773 : const auto psChildSchema = schema->children[0];
3420 1773 : const auto psChildArray = array->children[0];
3421 :
3422 1773 : const size_t nLength = abyValidityFromFilters.size();
3423 1773 : const size_t nOffset = static_cast<size_t>(array->offset);
3424 1773 : OffsetType *panOffsets =
3425 1773 : static_cast<OffsetType *>(const_cast<void *>(array->buffers[1])) +
3426 : nOffset;
3427 :
3428 1773 : if (panOffsets[iStart + nLength] > panOffsets[iStart])
3429 : {
3430 3186 : std::vector<bool> abyChildValidity(
3431 1593 : static_cast<size_t>(panOffsets[iStart + nLength] -
3432 1593 : panOffsets[iStart]),
3433 : true);
3434 1593 : size_t j = iStart;
3435 1593 : OffsetType nCurOffset = panOffsets[iStart];
3436 6694 : for (size_t i = 0; i < nLength; ++i)
3437 : {
3438 5101 : if (abyValidityFromFilters[i])
3439 : {
3440 2142 : const auto nSize =
3441 2142 : panOffsets[i + iStart + 1] - panOffsets[i + iStart];
3442 2142 : panOffsets[j] = nCurOffset;
3443 2142 : nCurOffset += nSize;
3444 2142 : ++j;
3445 : }
3446 : else
3447 : {
3448 2959 : const auto nStartOffset = panOffsets[i + iStart];
3449 2959 : const auto nEndOffset = panOffsets[i + iStart + 1];
3450 2959 : if (nStartOffset != nEndOffset)
3451 : {
3452 3073 : if (nStartOffset >=
3453 1538 : panOffsets[iStart] + abyChildValidity.size())
3454 : {
3455 : // shouldn't happen in sane arrays...
3456 0 : CPLError(CE_Failure, CPLE_AppDefined,
3457 : "nStartOffset >= panOffsets[iStart] + "
3458 : "abyChildValidity.size()");
3459 0 : return false;
3460 : }
3461 : // nEndOffset might be equal to abyChildValidity.size()
3462 3073 : if (nEndOffset >
3463 1538 : panOffsets[iStart] + abyChildValidity.size())
3464 : {
3465 : // shouldn't happen in sane arrays...
3466 0 : CPLError(CE_Failure, CPLE_AppDefined,
3467 : "nEndOffset > panOffsets[iStart] + "
3468 : "abyChildValidity.size()");
3469 0 : return false;
3470 : }
3471 1538 : for (auto k = nStartOffset - panOffsets[iStart];
3472 4652 : k < nEndOffset - panOffsets[iStart]; ++k)
3473 3114 : abyChildValidity[static_cast<size_t>(k)] = false;
3474 : }
3475 : }
3476 : }
3477 1593 : panOffsets[j] = nCurOffset;
3478 1593 : const size_t nChildNewLength = static_cast<size_t>(panOffsets[j]);
3479 : // To please Arrow validation
3480 4552 : for (; j < iStart + nLength; ++j)
3481 2959 : panOffsets[j] = nCurOffset;
3482 :
3483 1593 : if (!CompactArray(psChildSchema, psChildArray,
3484 1593 : static_cast<size_t>(panOffsets[iStart]),
3485 : abyChildValidity, nChildNewLength))
3486 0 : return false;
3487 :
3488 1593 : CPLAssert(psChildArray->length ==
3489 : static_cast<int64_t>(nChildNewLength));
3490 : }
3491 : else
3492 : {
3493 180 : InvalidateNullCountRec(psChildSchema, psChildArray);
3494 : }
3495 :
3496 1773 : if (schema->flags & ARROW_FLAG_NULLABLE)
3497 1773 : CompactValidityBuffer(schema, array, iStart, abyValidityFromFilters,
3498 : nNewLength);
3499 :
3500 1773 : array->length = nNewLength;
3501 :
3502 1773 : return true;
3503 : }
3504 :
3505 : /************************************************************************/
3506 : /* CompactFixedSizeListArray() */
3507 : /************************************************************************/
3508 :
3509 : static bool
3510 809 : CompactFixedSizeListArray(const struct ArrowSchema *schema,
3511 : struct ArrowArray *array, size_t N, size_t iStart,
3512 : const std::vector<bool> &abyValidityFromFilters,
3513 : size_t nNewLength)
3514 : {
3515 809 : CPLAssert(static_cast<size_t>(array->length) >=
3516 : iStart + abyValidityFromFilters.size());
3517 809 : CPLAssert(array->n_children == 1);
3518 :
3519 809 : const auto psChildSchema = schema->children[0];
3520 809 : const auto psChildArray = array->children[0];
3521 :
3522 809 : const size_t nLength = abyValidityFromFilters.size();
3523 809 : const size_t nOffset = static_cast<size_t>(array->offset);
3524 1618 : std::vector<bool> abyChildValidity(N * nLength, true);
3525 809 : size_t nChildNewLength = (iStart + nOffset) * N;
3526 809 : size_t nSrcLength = 0;
3527 3198 : for (size_t i = 0; i < nLength; ++i)
3528 : {
3529 2389 : if (abyValidityFromFilters[i])
3530 : {
3531 1015 : nChildNewLength += N;
3532 1015 : nSrcLength++;
3533 : }
3534 : else
3535 : {
3536 1374 : const size_t nStartOffset = i * N;
3537 1374 : const size_t nEndOffset = (i + 1) * N;
3538 4122 : for (size_t k = nStartOffset; k < nEndOffset; ++k)
3539 2748 : abyChildValidity[k] = false;
3540 : }
3541 : }
3542 809 : CPL_IGNORE_RET_VAL(nSrcLength);
3543 809 : CPLAssert(iStart + nSrcLength == nNewLength);
3544 :
3545 809 : if (!CompactArray(psChildSchema, psChildArray, (iStart + nOffset) * N,
3546 : abyChildValidity, nChildNewLength))
3547 0 : return false;
3548 :
3549 809 : if (schema->flags & ARROW_FLAG_NULLABLE)
3550 809 : CompactValidityBuffer(schema, array, iStart, abyValidityFromFilters,
3551 : nNewLength);
3552 :
3553 809 : array->length = nNewLength;
3554 :
3555 809 : CPLAssert(psChildArray->length >=
3556 : static_cast<int64_t>(N) * (array->length + array->offset));
3557 :
3558 809 : return true;
3559 : }
3560 :
3561 : /************************************************************************/
3562 : /* CompactMapArray() */
3563 : /************************************************************************/
3564 :
3565 561 : static bool CompactMapArray(const struct ArrowSchema *schema,
3566 : struct ArrowArray *array, size_t iStart,
3567 : const std::vector<bool> &abyValidityFromFilters,
3568 : size_t nNewLength)
3569 : {
3570 561 : return CompactListArray<uint32_t>(schema, array, iStart,
3571 561 : abyValidityFromFilters, nNewLength);
3572 : }
3573 :
3574 : /************************************************************************/
3575 : /* CompactArray() */
3576 : /************************************************************************/
3577 :
3578 8383 : static bool CompactArray(const struct ArrowSchema *schema,
3579 : struct ArrowArray *array, size_t iStart,
3580 : const std::vector<bool> &abyValidityFromFilters,
3581 : size_t nNewLength)
3582 : {
3583 8383 : const char *format = schema->format;
3584 :
3585 8383 : if (IsStructure(format))
3586 : {
3587 582 : if (!CompactStructArray(schema, array, iStart, abyValidityFromFilters,
3588 : nNewLength))
3589 0 : return false;
3590 : }
3591 7801 : else if (IsList(format))
3592 : {
3593 1209 : if (!CompactListArray<uint32_t>(schema, array, iStart,
3594 : abyValidityFromFilters, nNewLength))
3595 0 : return false;
3596 : }
3597 6592 : else if (IsLargeList(format))
3598 : {
3599 3 : if (!CompactListArray<uint64_t>(schema, array, iStart,
3600 : abyValidityFromFilters, nNewLength))
3601 0 : return false;
3602 : }
3603 6589 : else if (IsMap(format))
3604 : {
3605 561 : if (!CompactMapArray(schema, array, iStart, abyValidityFromFilters,
3606 : nNewLength))
3607 0 : return false;
3608 : }
3609 6028 : else if (IsFixedSizeList(format))
3610 : {
3611 809 : const int N = GetFixedSizeList(format);
3612 809 : if (N <= 0)
3613 0 : return false;
3614 809 : if (!CompactFixedSizeListArray(schema, array, static_cast<size_t>(N),
3615 : iStart, abyValidityFromFilters,
3616 : nNewLength))
3617 0 : return false;
3618 : }
3619 5219 : else if (IsBoolean(format))
3620 : {
3621 224 : CompactBoolArray(schema, array, iStart, abyValidityFromFilters,
3622 : nNewLength);
3623 : }
3624 4995 : else if (IsInt8(format) || IsUInt8(format))
3625 : {
3626 444 : CompactPrimitiveArray<uint8_t>(schema, array, iStart,
3627 : abyValidityFromFilters, nNewLength);
3628 : }
3629 4551 : else if (IsInt16(format) || IsUInt16(format) || IsFloat16(format))
3630 : {
3631 458 : CompactPrimitiveArray<uint16_t>(schema, array, iStart,
3632 : abyValidityFromFilters, nNewLength);
3633 : }
3634 7893 : else if (IsInt32(format) || IsUInt32(format) || IsFloat32(format) ||
3635 11323 : strcmp(format, "tdD") == 0 || strcmp(format, "tts") == 0 ||
3636 3430 : strcmp(format, "ttm") == 0)
3637 : {
3638 794 : CompactPrimitiveArray<uint32_t>(schema, array, iStart,
3639 : abyValidityFromFilters, nNewLength);
3640 : }
3641 5879 : else if (IsInt64(format) || IsUInt64(format) || IsFloat64(format) ||
3642 1997 : strcmp(format, "tdm") == 0 || strcmp(format, "ttu") == 0 ||
3643 5879 : strcmp(format, "ttn") == 0 || strncmp(format, "ts", 2) == 0)
3644 : {
3645 1807 : CompactPrimitiveArray<uint64_t>(schema, array, iStart,
3646 : abyValidityFromFilters, nNewLength);
3647 : }
3648 1492 : else if (IsString(format) || IsBinary(format))
3649 : {
3650 983 : CompactStringOrBinaryArray<uint32_t>(
3651 : schema, array, iStart, abyValidityFromFilters, nNewLength);
3652 : }
3653 509 : else if (IsLargeString(format) || IsLargeBinary(format))
3654 : {
3655 204 : CompactStringOrBinaryArray<uint64_t>(
3656 : schema, array, iStart, abyValidityFromFilters, nNewLength);
3657 : }
3658 305 : else if (IsFixedWidthBinary(format))
3659 : {
3660 67 : const int nWidth = GetFixedWithBinary(format);
3661 67 : CompactFixedWidthArray(schema, array, nWidth, iStart,
3662 : abyValidityFromFilters, nNewLength);
3663 : }
3664 238 : else if (IsDecimal(format))
3665 : {
3666 238 : int nPrecision = 0;
3667 238 : int nScale = 0;
3668 238 : int nWidthInBytes = 0;
3669 238 : if (!ParseDecimalFormat(format, nPrecision, nScale, nWidthInBytes))
3670 : {
3671 0 : CPLError(CE_Failure, CPLE_AppDefined,
3672 : "Unexpected error in PostFilterArrowArray(): unhandled "
3673 : "field format: %s",
3674 : format);
3675 :
3676 0 : return false;
3677 : }
3678 238 : CompactFixedWidthArray(schema, array, nWidthInBytes, iStart,
3679 : abyValidityFromFilters, nNewLength);
3680 : }
3681 : else
3682 : {
3683 0 : CPLError(CE_Failure, CPLE_AppDefined,
3684 : "Unexpected error in CompactArray(): unhandled "
3685 : "field format: %s",
3686 : format);
3687 0 : return false;
3688 : }
3689 :
3690 8383 : return true;
3691 : }
3692 :
3693 : /************************************************************************/
3694 : /* FillValidityArrayFromWKBArray() */
3695 : /************************************************************************/
3696 :
3697 : template <class OffsetType>
3698 : static size_t
3699 21 : FillValidityArrayFromWKBArray(struct ArrowArray *array, const OGRLayer *poLayer,
3700 : std::vector<bool> &abyValidityFromFilters)
3701 : {
3702 21 : const size_t nLength = static_cast<size_t>(array->length);
3703 14 : const uint8_t *pabyValidity =
3704 21 : array->null_count == 0
3705 : ? nullptr
3706 7 : : static_cast<const uint8_t *>(array->buffers[0]);
3707 21 : const size_t nOffset = static_cast<size_t>(array->offset);
3708 21 : const OffsetType *panOffsets =
3709 21 : static_cast<const OffsetType *>(array->buffers[1]) + nOffset;
3710 21 : const GByte *pabyData = static_cast<const GByte *>(array->buffers[2]);
3711 21 : OGREnvelope sEnvelope;
3712 21 : abyValidityFromFilters.resize(nLength);
3713 21 : size_t nCountIntersecting = 0;
3714 138 : for (size_t i = 0; i < nLength; ++i)
3715 : {
3716 117 : if (!pabyValidity || TestBit(pabyValidity, i + nOffset))
3717 : {
3718 110 : const GByte *pabyWKB = pabyData + panOffsets[i];
3719 110 : const size_t nWKBSize =
3720 110 : static_cast<size_t>(panOffsets[i + 1] - panOffsets[i]);
3721 110 : if (poLayer->FilterWKBGeometry(pabyWKB, nWKBSize,
3722 : /* bEnvelopeAlreadySet=*/false,
3723 : sEnvelope))
3724 : {
3725 29 : abyValidityFromFilters[i] = true;
3726 29 : nCountIntersecting++;
3727 : }
3728 : }
3729 : }
3730 21 : return nCountIntersecting;
3731 : }
3732 :
3733 : /************************************************************************/
3734 : /* ArrowTimestampToOGRDateTime() */
3735 : /************************************************************************/
3736 :
3737 107 : static void ArrowTimestampToOGRDateTime(int64_t nTimestamp,
3738 : int nInvFactorToSecond,
3739 : const char *pszTZ, OGRFeature &oFeature,
3740 : int iField)
3741 : {
3742 107 : double floatingPart = 0;
3743 107 : if (nInvFactorToSecond)
3744 : {
3745 107 : floatingPart =
3746 107 : (nTimestamp % nInvFactorToSecond) / double(nInvFactorToSecond);
3747 107 : nTimestamp /= nInvFactorToSecond;
3748 : }
3749 107 : int nTZFlag = 0;
3750 107 : const size_t nTZLen = strlen(pszTZ);
3751 107 : if ((nTZLen == 3 && strcmp(pszTZ, "UTC") == 0) ||
3752 0 : (nTZLen == 7 && strcmp(pszTZ, "Etc/UTC") == 0))
3753 : {
3754 17 : nTZFlag = 100;
3755 : }
3756 90 : else if (nTZLen == 6 && (pszTZ[0] == '+' || pszTZ[0] == '-') &&
3757 33 : pszTZ[3] == ':')
3758 : {
3759 33 : int nTZHour = atoi(pszTZ + 1);
3760 33 : int nTZMin = atoi(pszTZ + 4);
3761 33 : if (nTZHour >= 0 && nTZHour <= 14 && nTZMin >= 0 && nTZMin < 60 &&
3762 33 : (nTZMin % 15) == 0)
3763 : {
3764 33 : nTZFlag = (nTZHour * 4) + (nTZMin / 15);
3765 33 : if (pszTZ[0] == '+')
3766 : {
3767 24 : nTZFlag = 100 + nTZFlag;
3768 24 : nTimestamp += nTZHour * 3600 + nTZMin * 60;
3769 : }
3770 : else
3771 : {
3772 9 : nTZFlag = 100 - nTZFlag;
3773 9 : nTimestamp -= nTZHour * 3600 + nTZMin * 60;
3774 : }
3775 : }
3776 : }
3777 : struct tm dt;
3778 107 : CPLUnixTimeToYMDHMS(nTimestamp, &dt);
3779 107 : oFeature.SetField(iField, dt.tm_year + 1900, dt.tm_mon + 1, dt.tm_mday,
3780 : dt.tm_hour, dt.tm_min,
3781 107 : static_cast<float>(dt.tm_sec + floatingPart), nTZFlag);
3782 107 : }
3783 :
3784 : /************************************************************************/
3785 : /* BuildMapFieldNameToArrowPath() */
3786 : /************************************************************************/
3787 :
3788 : static void
3789 334 : BuildMapFieldNameToArrowPath(const struct ArrowSchema *schema,
3790 : std::map<std::string, std::vector<int>> &oMap,
3791 : const std::string &osPrefix,
3792 : std::vector<int> &anArrowPath)
3793 : {
3794 7833 : for (int64_t i = 0; i < schema->n_children; ++i)
3795 : {
3796 7499 : auto psChild = schema->children[i];
3797 7499 : anArrowPath.push_back(static_cast<int>(i));
3798 7499 : if (IsStructure(psChild->format))
3799 : {
3800 400 : std::string osNewPrefix(osPrefix);
3801 200 : osNewPrefix += psChild->name;
3802 200 : osNewPrefix += ".";
3803 200 : BuildMapFieldNameToArrowPath(psChild, oMap, osNewPrefix,
3804 : anArrowPath);
3805 : }
3806 : else
3807 : {
3808 7299 : oMap[osPrefix + psChild->name] = anArrowPath;
3809 : }
3810 7499 : anArrowPath.pop_back();
3811 : }
3812 334 : }
3813 :
3814 : /************************************************************************/
3815 : /* FillFieldList() */
3816 : /************************************************************************/
3817 :
3818 : template <typename ListOffsetType, typename ArrowType,
3819 : typename OGRType = ArrowType>
3820 167 : inline static void FillFieldList(const struct ArrowArray *array,
3821 : int iOGRFieldIdx, size_t nOffsettedIndex,
3822 : const struct ArrowArray *childArray,
3823 : OGRFeature &oFeature)
3824 : {
3825 167 : const auto panOffsets =
3826 167 : static_cast<const ListOffsetType *>(array->buffers[1]) +
3827 : nOffsettedIndex;
3828 334 : std::vector<OGRType> aValues;
3829 167 : const auto *paValues =
3830 167 : static_cast<const ArrowType *>(childArray->buffers[1]);
3831 167 : for (size_t i = static_cast<size_t>(panOffsets[0]);
3832 509 : i < static_cast<size_t>(panOffsets[1]); ++i)
3833 : {
3834 342 : aValues.push_back(static_cast<OGRType>(paValues[i]));
3835 : }
3836 167 : oFeature.SetField(iOGRFieldIdx, static_cast<int>(aValues.size()),
3837 : aValues.data());
3838 167 : }
3839 :
3840 : /************************************************************************/
3841 : /* FillFieldListFromBool() */
3842 : /************************************************************************/
3843 :
3844 : template <typename ListOffsetType>
3845 : inline static void
3846 16 : FillFieldListFromBool(const struct ArrowArray *array, int iOGRFieldIdx,
3847 : size_t nOffsettedIndex,
3848 : const struct ArrowArray *childArray, OGRFeature &oFeature)
3849 : {
3850 16 : const auto panOffsets =
3851 16 : static_cast<const ListOffsetType *>(array->buffers[1]) +
3852 : nOffsettedIndex;
3853 32 : std::vector<int> aValues;
3854 16 : const auto *paValues = static_cast<const uint8_t *>(childArray->buffers[1]);
3855 16 : for (size_t i = static_cast<size_t>(panOffsets[0]);
3856 48 : i < static_cast<size_t>(panOffsets[1]); ++i)
3857 : {
3858 32 : aValues.push_back(TestBit(paValues, i) ? 1 : 0);
3859 : }
3860 16 : oFeature.SetField(iOGRFieldIdx, static_cast<int>(aValues.size()),
3861 16 : aValues.data());
3862 16 : }
3863 :
3864 : /************************************************************************/
3865 : /* FillFieldListFromHalfFloat() */
3866 : /************************************************************************/
3867 :
3868 : template <typename ListOffsetType>
3869 8 : inline static void FillFieldListFromHalfFloat(
3870 : const struct ArrowArray *array, int iOGRFieldIdx, size_t nOffsettedIndex,
3871 : const struct ArrowArray *childArray, OGRFeature &oFeature)
3872 : {
3873 8 : const auto panOffsets =
3874 8 : static_cast<const ListOffsetType *>(array->buffers[1]) +
3875 : nOffsettedIndex;
3876 16 : std::vector<double> aValues;
3877 8 : const auto *paValues =
3878 8 : static_cast<const uint16_t *>(childArray->buffers[1]);
3879 8 : for (size_t i = static_cast<size_t>(panOffsets[0]);
3880 24 : i < static_cast<size_t>(panOffsets[1]); ++i)
3881 : {
3882 16 : const auto nFloat16AsUInt32 = CPLHalfToFloat(paValues[i]);
3883 : float f;
3884 16 : memcpy(&f, &nFloat16AsUInt32, sizeof(f));
3885 16 : aValues.push_back(f);
3886 : }
3887 8 : oFeature.SetField(iOGRFieldIdx, static_cast<int>(aValues.size()),
3888 8 : aValues.data());
3889 8 : }
3890 :
3891 : /************************************************************************/
3892 : /* FillFieldListFromString() */
3893 : /************************************************************************/
3894 :
3895 : template <typename ListOffsetType, typename StringOffsetType>
3896 32 : inline static void FillFieldListFromString(const struct ArrowArray *array,
3897 : int iOGRFieldIdx,
3898 : size_t nOffsettedIndex,
3899 : const struct ArrowArray *childArray,
3900 : OGRFeature &oFeature)
3901 : {
3902 32 : const auto panOffsets =
3903 32 : static_cast<const ListOffsetType *>(array->buffers[1]) +
3904 : nOffsettedIndex;
3905 64 : CPLStringList aosVals;
3906 32 : const auto panSubOffsets =
3907 32 : static_cast<const StringOffsetType *>(childArray->buffers[1]);
3908 32 : const char *pszValues = static_cast<const char *>(childArray->buffers[2]);
3909 64 : std::string osTmp;
3910 90 : for (size_t i = static_cast<size_t>(panOffsets[0]);
3911 90 : i < static_cast<size_t>(panOffsets[1]); ++i)
3912 : {
3913 58 : osTmp.assign(
3914 58 : pszValues + panSubOffsets[i],
3915 58 : static_cast<size_t>(panSubOffsets[i + 1] - panSubOffsets[i]));
3916 58 : aosVals.AddString(osTmp.c_str());
3917 : }
3918 32 : oFeature.SetField(iOGRFieldIdx, aosVals.List());
3919 32 : }
3920 :
3921 : /************************************************************************/
3922 : /* FillFieldFixedSizeList() */
3923 : /************************************************************************/
3924 :
3925 : template <typename ArrowType, typename OGRType = ArrowType>
3926 120 : inline static void FillFieldFixedSizeList(
3927 : const struct ArrowArray *, int iOGRFieldIdx, size_t nOffsettedIndex,
3928 : const int nItems, const struct ArrowArray *childArray, OGRFeature &oFeature)
3929 : {
3930 240 : std::vector<OGRType> aValues;
3931 120 : const auto *paValues =
3932 120 : static_cast<const ArrowType *>(childArray->buffers[1]) +
3933 120 : childArray->offset + nOffsettedIndex * nItems;
3934 360 : for (int i = 0; i < nItems; ++i)
3935 : {
3936 240 : aValues.push_back(static_cast<OGRType>(paValues[i]));
3937 : }
3938 120 : oFeature.SetField(iOGRFieldIdx, static_cast<int>(aValues.size()),
3939 : aValues.data());
3940 120 : }
3941 :
3942 : /************************************************************************/
3943 : /* FillFieldFixedSizeListString() */
3944 : /************************************************************************/
3945 :
3946 : template <typename StringOffsetType>
3947 17 : inline static void FillFieldFixedSizeListString(
3948 : const struct ArrowArray *, int iOGRFieldIdx, size_t nOffsettedIndex,
3949 : const int nItems, const struct ArrowArray *childArray, OGRFeature &oFeature)
3950 : {
3951 34 : CPLStringList aosVals;
3952 17 : const auto panSubOffsets =
3953 17 : static_cast<const StringOffsetType *>(childArray->buffers[1]) +
3954 17 : childArray->offset + nOffsettedIndex * nItems;
3955 17 : const char *pszValues = static_cast<const char *>(childArray->buffers[2]);
3956 34 : std::string osTmp;
3957 51 : for (int i = 0; i < nItems; ++i)
3958 : {
3959 34 : osTmp.assign(
3960 34 : pszValues + panSubOffsets[i],
3961 34 : static_cast<size_t>(panSubOffsets[i + 1] - panSubOffsets[i]));
3962 34 : aosVals.AddString(osTmp.c_str());
3963 : }
3964 17 : oFeature.SetField(iOGRFieldIdx, aosVals.List());
3965 17 : }
3966 :
3967 : /************************************************************************/
3968 : /* GetValue() */
3969 : /************************************************************************/
3970 :
3971 : template <typename ArrowType>
3972 245 : inline static ArrowType GetValue(const struct ArrowArray *array,
3973 : size_t iFeature)
3974 : {
3975 245 : const auto *panValues = static_cast<const ArrowType *>(array->buffers[1]);
3976 245 : return panValues[iFeature + array->offset];
3977 : }
3978 :
3979 12 : template <> bool GetValue<bool>(const struct ArrowArray *array, size_t iFeature)
3980 : {
3981 12 : const auto *pabyValues = static_cast<const uint8_t *>(array->buffers[1]);
3982 12 : return TestBit(pabyValues, iFeature + static_cast<size_t>(array->offset));
3983 : }
3984 :
3985 : /************************************************************************/
3986 : /* GetValueFloat16() */
3987 : /************************************************************************/
3988 :
3989 23 : static float GetValueFloat16(const struct ArrowArray *array, const size_t nIdx)
3990 : {
3991 23 : const auto *panValues = static_cast<const uint16_t *>(array->buffers[1]);
3992 : const auto nFloat16AsUInt32 =
3993 23 : CPLHalfToFloat(panValues[nIdx + array->offset]);
3994 : float f;
3995 23 : memcpy(&f, &nFloat16AsUInt32, sizeof(f));
3996 23 : return f;
3997 : }
3998 :
3999 : /************************************************************************/
4000 : /* GetValueDecimal() */
4001 : /************************************************************************/
4002 :
4003 71 : static double GetValueDecimal(const struct ArrowArray *array,
4004 : const int nWidthIn64BitWord, const int nScale,
4005 : const size_t nIdx)
4006 : {
4007 : #ifdef CPL_LSB
4008 71 : const auto nIdxIn64BitWord = nIdx * nWidthIn64BitWord;
4009 : #else
4010 : const auto nIdxIn64BitWord =
4011 : nIdx * nWidthIn64BitWord + nWidthIn64BitWord - 1;
4012 : #endif
4013 71 : const auto *panValues = static_cast<const int64_t *>(array->buffers[1]);
4014 71 : const auto nVal =
4015 71 : panValues[nIdxIn64BitWord + array->offset * nWidthIn64BitWord];
4016 71 : return static_cast<double>(nVal) * std::pow(10.0, -nScale);
4017 : }
4018 :
4019 : /************************************************************************/
4020 : /* GetString() */
4021 : /************************************************************************/
4022 :
4023 : template <class OffsetType>
4024 33 : static std::string GetString(const struct ArrowArray *array, const size_t nIdx)
4025 : {
4026 33 : const OffsetType *panOffsets =
4027 33 : static_cast<const OffsetType *>(array->buffers[1]) +
4028 33 : static_cast<size_t>(array->offset) + nIdx;
4029 33 : const char *pabyStr = static_cast<const char *>(array->buffers[2]);
4030 33 : std::string osStr;
4031 33 : osStr.assign(pabyStr + static_cast<size_t>(panOffsets[0]),
4032 33 : static_cast<size_t>(panOffsets[1] - panOffsets[0]));
4033 33 : return osStr;
4034 : }
4035 :
4036 : /************************************************************************/
4037 : /* GetBinaryAsBase64() */
4038 : /************************************************************************/
4039 :
4040 : template <class OffsetType>
4041 8 : static std::string GetBinaryAsBase64(const struct ArrowArray *array,
4042 : const size_t nIdx)
4043 : {
4044 8 : const OffsetType *panOffsets =
4045 8 : static_cast<const OffsetType *>(array->buffers[1]) +
4046 8 : static_cast<size_t>(array->offset) + nIdx;
4047 8 : const GByte *pabyData = static_cast<const GByte *>(array->buffers[2]);
4048 8 : const size_t nLen = static_cast<size_t>(panOffsets[1] - panOffsets[0]);
4049 8 : if (nLen > static_cast<size_t>(std::numeric_limits<int>::max()))
4050 : {
4051 0 : CPLError(CE_Failure, CPLE_AppDefined, "Too large binary");
4052 0 : return std::string();
4053 : }
4054 16 : char *pszVal = CPLBase64Encode(
4055 8 : static_cast<int>(nLen), pabyData + static_cast<size_t>(panOffsets[0]));
4056 16 : std::string osStr(pszVal);
4057 8 : CPLFree(pszVal);
4058 8 : return osStr;
4059 : }
4060 :
4061 : /************************************************************************/
4062 : /* GetValueFixedWithBinaryAsBase64() */
4063 : /************************************************************************/
4064 :
4065 : static std::string
4066 4 : GetValueFixedWithBinaryAsBase64(const struct ArrowArray *array,
4067 : const int nWidth, const size_t nIdx)
4068 : {
4069 4 : const GByte *pabyData = static_cast<const GByte *>(array->buffers[1]);
4070 8 : char *pszVal = CPLBase64Encode(
4071 : nWidth,
4072 4 : pabyData + (static_cast<size_t>(array->offset) + nIdx) * nWidth);
4073 4 : std::string osStr(pszVal);
4074 4 : CPLFree(pszVal);
4075 4 : return osStr;
4076 : }
4077 :
4078 : static CPLJSONObject GetObjectAsJSON(const struct ArrowSchema *schema,
4079 : const struct ArrowArray *array,
4080 : const size_t nIdx);
4081 :
4082 : /************************************************************************/
4083 : /* AddToArray() */
4084 : /************************************************************************/
4085 :
4086 142 : static void AddToArray(CPLJSONArray &oArray, const struct ArrowSchema *schema,
4087 : const struct ArrowArray *array, const size_t nIdx)
4088 : {
4089 142 : if (IsBoolean(schema->format))
4090 7 : oArray.Add(GetValue<bool>(array, nIdx));
4091 135 : else if (IsUInt8(schema->format))
4092 13 : oArray.Add(GetValue<uint8_t>(array, nIdx));
4093 122 : else if (IsInt8(schema->format))
4094 7 : oArray.Add(GetValue<int8_t>(array, nIdx));
4095 115 : else if (IsUInt16(schema->format))
4096 7 : oArray.Add(GetValue<uint16_t>(array, nIdx));
4097 108 : else if (IsInt16(schema->format))
4098 7 : oArray.Add(GetValue<int16_t>(array, nIdx));
4099 101 : else if (IsUInt32(schema->format))
4100 7 : oArray.Add(static_cast<GIntBig>(GetValue<uint32_t>(array, nIdx)));
4101 94 : else if (IsInt32(schema->format))
4102 7 : oArray.Add(GetValue<int32_t>(array, nIdx));
4103 87 : else if (IsUInt64(schema->format))
4104 7 : oArray.Add(GetValue<uint64_t>(array, nIdx));
4105 80 : else if (IsInt64(schema->format))
4106 7 : oArray.Add(static_cast<GIntBig>(GetValue<int64_t>(array, nIdx)));
4107 73 : else if (IsFloat16(schema->format))
4108 7 : oArray.Add(GetValueFloat16(array, nIdx));
4109 66 : else if (IsFloat32(schema->format))
4110 7 : oArray.Add(GetValue<float>(array, nIdx));
4111 59 : else if (IsFloat64(schema->format))
4112 7 : oArray.Add(GetValue<double>(array, nIdx));
4113 52 : else if (IsString(schema->format))
4114 13 : oArray.Add(GetString<uint32_t>(array, nIdx));
4115 39 : else if (IsLargeString(schema->format))
4116 4 : oArray.Add(GetString<uint64_t>(array, nIdx));
4117 35 : else if (IsBinary(schema->format))
4118 2 : oArray.Add(GetBinaryAsBase64<uint32_t>(array, nIdx));
4119 33 : else if (IsLargeBinary(schema->format))
4120 2 : oArray.Add(GetBinaryAsBase64<uint64_t>(array, nIdx));
4121 31 : else if (IsFixedWidthBinary(schema->format))
4122 2 : oArray.Add(GetValueFixedWithBinaryAsBase64(
4123 2 : array, GetFixedWithBinary(schema->format), nIdx));
4124 29 : else if (IsDecimal(schema->format))
4125 : {
4126 7 : int nPrecision = 0;
4127 7 : int nScale = 0;
4128 7 : int nWidthInBytes = 0;
4129 7 : const bool bOK = ParseDecimalFormat(schema->format, nPrecision, nScale,
4130 7 : nWidthInBytes);
4131 : // Already validated
4132 7 : CPLAssert(bOK);
4133 7 : CPL_IGNORE_RET_VAL(bOK);
4134 7 : oArray.Add(GetValueDecimal(array, nWidthInBytes / 8, nScale, nIdx));
4135 : }
4136 : else
4137 22 : oArray.Add(GetObjectAsJSON(schema, array, nIdx));
4138 142 : }
4139 :
4140 : /************************************************************************/
4141 : /* GetListAsJSON() */
4142 : /************************************************************************/
4143 :
4144 : template <class OffsetType>
4145 112 : static CPLJSONArray GetListAsJSON(const struct ArrowSchema *schema,
4146 : const struct ArrowArray *array,
4147 : const size_t nIdx)
4148 : {
4149 112 : CPLJSONArray oArray;
4150 112 : const auto panOffsets = static_cast<const OffsetType *>(array->buffers[1]) +
4151 112 : array->offset + nIdx;
4152 112 : const auto childSchema = schema->children[0];
4153 112 : const auto childArray = array->children[0];
4154 5 : const uint8_t *pabyValidity =
4155 112 : childArray->null_count == 0
4156 : ? nullptr
4157 107 : : static_cast<const uint8_t *>(childArray->buffers[0]);
4158 278 : for (size_t k = static_cast<size_t>(panOffsets[0]);
4159 278 : k < static_cast<size_t>(panOffsets[1]); k++)
4160 : {
4161 318 : if (!pabyValidity ||
4162 152 : TestBit(pabyValidity, k + static_cast<size_t>(childArray->offset)))
4163 : {
4164 136 : AddToArray(oArray, childSchema, childArray, k);
4165 : }
4166 : else
4167 : {
4168 30 : oArray.AddNull();
4169 : }
4170 : }
4171 112 : return oArray;
4172 : }
4173 :
4174 : /************************************************************************/
4175 : /* GetFixedSizeListAsJSON() */
4176 : /************************************************************************/
4177 :
4178 3 : static CPLJSONArray GetFixedSizeListAsJSON(const struct ArrowSchema *schema,
4179 : const struct ArrowArray *array,
4180 : const size_t nIdx)
4181 : {
4182 3 : CPLJSONArray oArray;
4183 3 : const int nVals = GetFixedSizeList(schema->format);
4184 3 : const auto childSchema = schema->children[0];
4185 3 : const auto childArray = array->children[0];
4186 3 : const uint8_t *pabyValidity =
4187 3 : childArray->null_count == 0
4188 3 : ? nullptr
4189 3 : : static_cast<const uint8_t *>(childArray->buffers[0]);
4190 9 : for (size_t k = nIdx * nVals; k < (nIdx + 1) * nVals; k++)
4191 : {
4192 12 : if (!pabyValidity ||
4193 6 : TestBit(pabyValidity, k + static_cast<size_t>(childArray->offset)))
4194 : {
4195 6 : AddToArray(oArray, childSchema, childArray, k);
4196 : }
4197 : else
4198 : {
4199 0 : oArray.AddNull();
4200 : }
4201 : }
4202 3 : return oArray;
4203 : }
4204 :
4205 : /************************************************************************/
4206 : /* AddToDict() */
4207 : /************************************************************************/
4208 :
4209 198 : static void AddToDict(CPLJSONObject &oDict, const std::string &osKey,
4210 : const struct ArrowSchema *schema,
4211 : const struct ArrowArray *array, const size_t nIdx)
4212 : {
4213 198 : if (IsBoolean(schema->format))
4214 5 : oDict.Add(osKey, GetValue<bool>(array, nIdx));
4215 193 : else if (IsUInt8(schema->format))
4216 5 : oDict.Add(osKey, GetValue<uint8_t>(array, nIdx));
4217 188 : else if (IsInt8(schema->format))
4218 5 : oDict.Add(osKey, GetValue<int8_t>(array, nIdx));
4219 183 : else if (IsUInt16(schema->format))
4220 5 : oDict.Add(osKey, GetValue<uint16_t>(array, nIdx));
4221 178 : else if (IsInt16(schema->format))
4222 5 : oDict.Add(osKey, GetValue<int16_t>(array, nIdx));
4223 173 : else if (IsUInt32(schema->format))
4224 2 : oDict.Add(osKey, static_cast<GIntBig>(GetValue<uint32_t>(array, nIdx)));
4225 171 : else if (IsInt32(schema->format))
4226 6 : oDict.Add(osKey, GetValue<int32_t>(array, nIdx));
4227 165 : else if (IsUInt64(schema->format))
4228 5 : oDict.Add(osKey, GetValue<uint64_t>(array, nIdx));
4229 160 : else if (IsInt64(schema->format))
4230 22 : oDict.Add(osKey, static_cast<GIntBig>(GetValue<int64_t>(array, nIdx)));
4231 138 : else if (IsFloat16(schema->format))
4232 2 : oDict.Add(osKey, GetValueFloat16(array, nIdx));
4233 136 : else if (IsFloat32(schema->format))
4234 5 : oDict.Add(osKey, GetValue<float>(array, nIdx));
4235 131 : else if (IsFloat64(schema->format))
4236 19 : oDict.Add(osKey, GetValue<double>(array, nIdx));
4237 112 : else if (IsString(schema->format))
4238 14 : oDict.Add(osKey, GetString<uint32_t>(array, nIdx));
4239 98 : else if (IsLargeString(schema->format))
4240 2 : oDict.Add(osKey, GetString<uint64_t>(array, nIdx));
4241 96 : else if (IsBinary(schema->format))
4242 2 : oDict.Add(osKey, GetBinaryAsBase64<uint32_t>(array, nIdx));
4243 94 : else if (IsLargeBinary(schema->format))
4244 2 : oDict.Add(osKey, GetBinaryAsBase64<uint64_t>(array, nIdx));
4245 92 : else if (IsFixedWidthBinary(schema->format))
4246 2 : oDict.Add(osKey, GetValueFixedWithBinaryAsBase64(
4247 2 : array, GetFixedWithBinary(schema->format), nIdx));
4248 90 : else if (IsDecimal(schema->format))
4249 : {
4250 8 : int nPrecision = 0;
4251 8 : int nScale = 0;
4252 8 : int nWidthInBytes = 0;
4253 8 : const bool bOK = ParseDecimalFormat(schema->format, nPrecision, nScale,
4254 8 : nWidthInBytes);
4255 : // Already validated
4256 8 : CPLAssert(bOK);
4257 8 : CPL_IGNORE_RET_VAL(bOK);
4258 8 : oDict.Add(osKey,
4259 : GetValueDecimal(array, nWidthInBytes / 8, nScale, nIdx));
4260 : }
4261 : else
4262 82 : oDict.Add(osKey, GetObjectAsJSON(schema, array, nIdx));
4263 198 : }
4264 :
4265 : /************************************************************************/
4266 : /* GetMapAsJSON() */
4267 : /************************************************************************/
4268 :
4269 243 : static CPLJSONObject GetMapAsJSON(const struct ArrowSchema *schema,
4270 : const struct ArrowArray *array,
4271 : const size_t nIdx)
4272 : {
4273 243 : const auto schemaStruct = schema->children[0];
4274 243 : if (!IsStructure(schemaStruct->format))
4275 : {
4276 0 : CPLError(CE_Failure, CPLE_AppDefined,
4277 : "GetMapAsJSON(): !IsStructure(schemaStruct->format))");
4278 0 : return CPLJSONObject();
4279 : }
4280 243 : const auto schemaKey = schemaStruct->children[0];
4281 243 : const auto schemaValues = schemaStruct->children[1];
4282 243 : if (!IsString(schemaKey->format))
4283 : {
4284 0 : CPLError(CE_Failure, CPLE_AppDefined,
4285 : "GetMapAsJSON(): !IsString(schemaKey->format))");
4286 0 : return CPLJSONObject();
4287 : }
4288 243 : const auto arrayKeys = array->children[0]->children[0];
4289 243 : const auto arrayValues = array->children[0]->children[1];
4290 :
4291 486 : CPLJSONObject oDict;
4292 243 : const auto panOffsets =
4293 243 : static_cast<const uint32_t *>(array->buffers[1]) + array->offset + nIdx;
4294 243 : const uint8_t *pabyValidityKeys =
4295 243 : arrayKeys->null_count == 0
4296 243 : ? nullptr
4297 0 : : static_cast<const uint8_t *>(arrayKeys->buffers[0]);
4298 243 : const uint32_t *panOffsetsKeys =
4299 243 : static_cast<const uint32_t *>(arrayKeys->buffers[1]) +
4300 243 : arrayKeys->offset;
4301 243 : const char *pabyKeys = static_cast<const char *>(arrayKeys->buffers[2]);
4302 243 : const uint8_t *pabyValidityValues =
4303 243 : arrayValues->null_count == 0
4304 243 : ? nullptr
4305 237 : : static_cast<const uint8_t *>(arrayValues->buffers[0]);
4306 463 : for (uint32_t k = panOffsets[0]; k < panOffsets[1]; k++)
4307 : {
4308 220 : if (!pabyValidityKeys ||
4309 0 : TestBit(pabyValidityKeys,
4310 0 : k + static_cast<size_t>(arrayKeys->offset)))
4311 : {
4312 440 : std::string osKey;
4313 220 : osKey.assign(pabyKeys + panOffsetsKeys[k],
4314 220 : panOffsetsKeys[k + 1] - panOffsetsKeys[k]);
4315 :
4316 433 : if (!pabyValidityValues ||
4317 213 : TestBit(pabyValidityValues,
4318 213 : k + static_cast<size_t>(arrayValues->offset)))
4319 : {
4320 168 : AddToDict(oDict, osKey, schemaValues, arrayValues, k);
4321 : }
4322 : else
4323 : {
4324 52 : oDict.AddNull(osKey);
4325 : }
4326 : }
4327 : }
4328 243 : return oDict;
4329 : }
4330 :
4331 : /************************************************************************/
4332 : /* GetStructureAsJSON() */
4333 : /************************************************************************/
4334 :
4335 16 : static CPLJSONObject GetStructureAsJSON(const struct ArrowSchema *schema,
4336 : const struct ArrowArray *array,
4337 : const size_t nIdx)
4338 : {
4339 16 : CPLJSONObject oDict;
4340 62 : for (int64_t k = 0; k < array->n_children; k++)
4341 : {
4342 46 : const uint8_t *pabyValidityValues =
4343 46 : array->children[k]->null_count == 0
4344 46 : ? nullptr
4345 36 : : static_cast<const uint8_t *>(array->children[k]->buffers[0]);
4346 82 : if (!pabyValidityValues ||
4347 36 : TestBit(pabyValidityValues,
4348 36 : nIdx + static_cast<size_t>(array->children[k]->offset)))
4349 : {
4350 30 : AddToDict(oDict, schema->children[k]->name, schema->children[k],
4351 30 : array->children[k], nIdx);
4352 : }
4353 : else
4354 : {
4355 16 : oDict.AddNull(schema->children[k]->name);
4356 : }
4357 : }
4358 16 : return oDict;
4359 : }
4360 :
4361 : /************************************************************************/
4362 : /* GetObjectAsJSON() */
4363 : /************************************************************************/
4364 :
4365 104 : static CPLJSONObject GetObjectAsJSON(const struct ArrowSchema *schema,
4366 : const struct ArrowArray *array,
4367 : const size_t nIdx)
4368 : {
4369 104 : if (IsMap(schema->format))
4370 4 : return GetMapAsJSON(schema, array, nIdx);
4371 100 : else if (IsList(schema->format))
4372 156 : return GetListAsJSON<uint32_t>(schema, array, nIdx);
4373 22 : else if (IsLargeList(schema->format))
4374 6 : return GetListAsJSON<uint64_t>(schema, array, nIdx);
4375 19 : else if (IsFixedSizeList(schema->format))
4376 6 : return GetFixedSizeListAsJSON(schema, array, nIdx);
4377 16 : else if (IsStructure(schema->format))
4378 16 : return GetStructureAsJSON(schema, array, nIdx);
4379 : else
4380 : {
4381 0 : CPLError(CE_Failure, CPLE_AppDefined,
4382 : "GetObjectAsJSON(): unhandled value format: %s",
4383 0 : schema->format);
4384 0 : return CPLJSONObject();
4385 : }
4386 : }
4387 :
4388 : /************************************************************************/
4389 : /* SetFieldForOtherFormats() */
4390 : /************************************************************************/
4391 :
4392 856 : static bool SetFieldForOtherFormats(OGRFeature &oFeature,
4393 : const int iOGRFieldIndex,
4394 : const size_t nOffsettedIndex,
4395 : const struct ArrowSchema *schema,
4396 : const struct ArrowArray *array)
4397 : {
4398 856 : const char *format = schema->format;
4399 856 : if (IsFloat16(format))
4400 : {
4401 4 : oFeature.SetField(
4402 : iOGRFieldIndex,
4403 4 : GetValueFloat16(array, nOffsettedIndex -
4404 4 : static_cast<size_t>(array->offset)));
4405 : }
4406 :
4407 852 : else if (IsFixedWidthBinary(format))
4408 : {
4409 : // Fixed width binary
4410 17 : const int nWidth = GetFixedWithBinary(format);
4411 17 : oFeature.SetField(iOGRFieldIndex, nWidth,
4412 17 : static_cast<const GByte *>(array->buffers[1]) +
4413 17 : nOffsettedIndex * nWidth);
4414 : }
4415 835 : else if (format[0] == 't' && format[1] == 'd' &&
4416 38 : format[2] == 'D') // strcmp(format, "tdD") == 0
4417 : {
4418 : // date32[days]
4419 : // number of days since Epoch
4420 33 : int64_t timestamp = static_cast<int64_t>(static_cast<const int32_t *>(
4421 33 : array->buffers[1])[nOffsettedIndex]) *
4422 : 3600 * 24;
4423 : struct tm dt;
4424 33 : CPLUnixTimeToYMDHMS(timestamp, &dt);
4425 33 : oFeature.SetField(iOGRFieldIndex, dt.tm_year + 1900, dt.tm_mon + 1,
4426 : dt.tm_mday, 0, 0, 0);
4427 33 : return true;
4428 : }
4429 802 : else if (format[0] == 't' && format[1] == 'd' &&
4430 5 : format[2] == 'm') // strcmp(format, "tdm") == 0
4431 : {
4432 : // date64[milliseconds]
4433 : // number of milliseconds since Epoch
4434 5 : int64_t timestamp =
4435 5 : static_cast<const int64_t *>(array->buffers[1])[nOffsettedIndex] /
4436 : 1000;
4437 : struct tm dt;
4438 5 : CPLUnixTimeToYMDHMS(timestamp, &dt);
4439 5 : oFeature.SetField(iOGRFieldIndex, dt.tm_year + 1900, dt.tm_mon + 1,
4440 5 : dt.tm_mday, 0, 0, 0);
4441 : }
4442 797 : else if (format[0] == 't' && format[1] == 't' &&
4443 39 : format[2] == 's') // strcmp(format, "tts") == 0
4444 : {
4445 : // time32 [seconds]
4446 0 : int32_t value =
4447 0 : static_cast<const int32_t *>(array->buffers[1])[nOffsettedIndex];
4448 0 : const int nHour = value / 3600;
4449 0 : const int nMinute = (value / 60) % 60;
4450 0 : const int nSecond = value % 60;
4451 0 : oFeature.SetField(iOGRFieldIndex, 0, 0, 0, nHour, nMinute,
4452 0 : static_cast<float>(nSecond));
4453 : }
4454 797 : else if (format[0] == 't' && format[1] == 't' &&
4455 39 : format[2] == 'm') // strcmp(format, "ttm") == 0
4456 : {
4457 : // time32 [milliseconds]
4458 25 : int32_t value =
4459 25 : static_cast<const int32_t *>(array->buffers[1])[nOffsettedIndex];
4460 25 : double floatingPart = (value % 1000) / 1e3;
4461 25 : value /= 1000;
4462 25 : const int nHour = value / 3600;
4463 25 : const int nMinute = (value / 60) % 60;
4464 25 : const int nSecond = value % 60;
4465 25 : oFeature.SetField(iOGRFieldIndex, 0, 0, 0, nHour, nMinute,
4466 25 : static_cast<float>(nSecond + floatingPart));
4467 : }
4468 772 : else if (format[0] == 't' && format[1] == 't' &&
4469 14 : (format[2] == 'u' || // time64 [microseconds]
4470 7 : format[2] == 'n')) // time64 [nanoseconds]
4471 : {
4472 14 : oFeature.SetField(iOGRFieldIndex,
4473 14 : static_cast<GIntBig>(static_cast<const int64_t *>(
4474 14 : array->buffers[1])[nOffsettedIndex]));
4475 : }
4476 758 : else if (IsTimestampSeconds(format))
4477 : {
4478 0 : ArrowTimestampToOGRDateTime(
4479 0 : static_cast<const int64_t *>(array->buffers[1])[nOffsettedIndex], 1,
4480 : GetTimestampTimezone(format), oFeature, iOGRFieldIndex);
4481 : }
4482 758 : else if (IsTimestampMilliseconds(format))
4483 : {
4484 73 : ArrowTimestampToOGRDateTime(
4485 73 : static_cast<const int64_t *>(array->buffers[1])[nOffsettedIndex],
4486 : 1000, GetTimestampTimezone(format), oFeature, iOGRFieldIndex);
4487 : }
4488 685 : else if (IsTimestampMicroseconds(format))
4489 : {
4490 34 : ArrowTimestampToOGRDateTime(
4491 34 : static_cast<const int64_t *>(array->buffers[1])[nOffsettedIndex],
4492 : 1000 * 1000, GetTimestampTimezone(format), oFeature,
4493 : iOGRFieldIndex);
4494 : }
4495 651 : else if (IsTimestampNanoseconds(format))
4496 : {
4497 0 : ArrowTimestampToOGRDateTime(
4498 0 : static_cast<const int64_t *>(array->buffers[1])[nOffsettedIndex],
4499 : 1000 * 1000 * 1000, GetTimestampTimezone(format), oFeature,
4500 : iOGRFieldIndex);
4501 : }
4502 651 : else if (IsFixedSizeList(format))
4503 : {
4504 154 : const int nItems = GetFixedSizeList(format);
4505 154 : const auto childArray = array->children[0];
4506 154 : const char *childFormat = schema->children[0]->format;
4507 154 : if (IsBoolean(childFormat))
4508 : {
4509 24 : std::vector<int> aValues;
4510 12 : const auto *paValues =
4511 12 : static_cast<const uint8_t *>(childArray->buffers[1]);
4512 36 : for (int i = 0; i < nItems; ++i)
4513 : {
4514 24 : aValues.push_back(
4515 24 : TestBit(paValues,
4516 24 : static_cast<size_t>(childArray->offset +
4517 24 : nOffsettedIndex * nItems + i))
4518 24 : ? 1
4519 : : 0);
4520 : }
4521 12 : oFeature.SetField(iOGRFieldIndex, static_cast<int>(aValues.size()),
4522 12 : aValues.data());
4523 : }
4524 142 : else if (IsInt8(childFormat))
4525 : {
4526 12 : FillFieldFixedSizeList<int8_t, int>(array, iOGRFieldIndex,
4527 : nOffsettedIndex, nItems,
4528 : childArray, oFeature);
4529 : }
4530 130 : else if (IsUInt8(childFormat))
4531 : {
4532 12 : FillFieldFixedSizeList<uint8_t, int>(array, iOGRFieldIndex,
4533 : nOffsettedIndex, nItems,
4534 : childArray, oFeature);
4535 : }
4536 118 : else if (IsInt16(childFormat))
4537 : {
4538 12 : FillFieldFixedSizeList<int16_t, int>(array, iOGRFieldIndex,
4539 : nOffsettedIndex, nItems,
4540 : childArray, oFeature);
4541 : }
4542 106 : else if (IsUInt16(childFormat))
4543 : {
4544 12 : FillFieldFixedSizeList<uint16_t, int>(array, iOGRFieldIndex,
4545 : nOffsettedIndex, nItems,
4546 : childArray, oFeature);
4547 : }
4548 94 : else if (IsInt32(childFormat))
4549 : {
4550 12 : FillFieldFixedSizeList<int32_t, int>(array, iOGRFieldIndex,
4551 : nOffsettedIndex, nItems,
4552 : childArray, oFeature);
4553 : }
4554 82 : else if (IsUInt32(childFormat))
4555 : {
4556 5 : FillFieldFixedSizeList<uint32_t, GIntBig>(array, iOGRFieldIndex,
4557 : nOffsettedIndex, nItems,
4558 : childArray, oFeature);
4559 : }
4560 77 : else if (IsInt64(childFormat))
4561 : {
4562 19 : FillFieldFixedSizeList<int64_t, GIntBig>(array, iOGRFieldIndex,
4563 : nOffsettedIndex, nItems,
4564 : childArray, oFeature);
4565 : }
4566 58 : else if (IsUInt64(childFormat))
4567 : {
4568 12 : FillFieldFixedSizeList<uint64_t, double>(array, iOGRFieldIndex,
4569 : nOffsettedIndex, nItems,
4570 : childArray, oFeature);
4571 : }
4572 46 : else if (IsFloat16(childFormat))
4573 : {
4574 10 : std::vector<double> aValues;
4575 15 : for (int i = 0; i < nItems; ++i)
4576 : {
4577 10 : aValues.push_back(
4578 10 : GetValueFloat16(childArray, nOffsettedIndex * nItems + i));
4579 : }
4580 5 : oFeature.SetField(iOGRFieldIndex, static_cast<int>(aValues.size()),
4581 5 : aValues.data());
4582 : }
4583 41 : else if (IsFloat32(childFormat))
4584 : {
4585 12 : FillFieldFixedSizeList<float, double>(array, iOGRFieldIndex,
4586 : nOffsettedIndex, nItems,
4587 : childArray, oFeature);
4588 : }
4589 29 : else if (IsFloat64(childFormat))
4590 : {
4591 12 : FillFieldFixedSizeList<double, double>(array, iOGRFieldIndex,
4592 : nOffsettedIndex, nItems,
4593 : childArray, oFeature);
4594 : }
4595 17 : else if (IsString(childFormat))
4596 : {
4597 12 : FillFieldFixedSizeListString<uint32_t>(array, iOGRFieldIndex,
4598 : nOffsettedIndex, nItems,
4599 : childArray, oFeature);
4600 : }
4601 5 : else if (IsLargeString(childFormat))
4602 : {
4603 5 : FillFieldFixedSizeListString<uint64_t>(array, iOGRFieldIndex,
4604 : nOffsettedIndex, nItems,
4605 : childArray, oFeature);
4606 : }
4607 : }
4608 497 : else if (IsList(format) || IsLargeList(format))
4609 : {
4610 254 : const auto childArray = array->children[0];
4611 254 : const char *childFormat = schema->children[0]->format;
4612 254 : if (IsBoolean(childFormat))
4613 : {
4614 16 : if (format[1] == ARROW_2ND_LETTER_LIST)
4615 12 : FillFieldListFromBool<uint32_t>(array, iOGRFieldIndex,
4616 : nOffsettedIndex, childArray,
4617 : oFeature);
4618 : else
4619 4 : FillFieldListFromBool<uint64_t>(array, iOGRFieldIndex,
4620 : nOffsettedIndex, childArray,
4621 : oFeature);
4622 : }
4623 238 : else if (IsInt8(childFormat))
4624 : {
4625 14 : if (format[1] == ARROW_2ND_LETTER_LIST)
4626 10 : FillFieldList<uint32_t, int8_t, int>(array, iOGRFieldIndex,
4627 : nOffsettedIndex,
4628 : childArray, oFeature);
4629 : else
4630 4 : FillFieldList<uint64_t, int8_t, int>(array, iOGRFieldIndex,
4631 : nOffsettedIndex,
4632 : childArray, oFeature);
4633 : }
4634 224 : else if (IsUInt8(childFormat))
4635 : {
4636 18 : if (format[1] == ARROW_2ND_LETTER_LIST)
4637 14 : FillFieldList<uint32_t, uint8_t, int>(array, iOGRFieldIndex,
4638 : nOffsettedIndex,
4639 : childArray, oFeature);
4640 : else
4641 4 : FillFieldList<uint64_t, uint8_t, int>(array, iOGRFieldIndex,
4642 : nOffsettedIndex,
4643 : childArray, oFeature);
4644 : }
4645 206 : else if (IsInt16(childFormat))
4646 : {
4647 16 : if (format[1] == ARROW_2ND_LETTER_LIST)
4648 12 : FillFieldList<uint32_t, int16_t, int>(array, iOGRFieldIndex,
4649 : nOffsettedIndex,
4650 : childArray, oFeature);
4651 : else
4652 4 : FillFieldList<uint64_t, int16_t, int>(array, iOGRFieldIndex,
4653 : nOffsettedIndex,
4654 : childArray, oFeature);
4655 : }
4656 190 : else if (IsUInt16(childFormat))
4657 : {
4658 14 : if (format[1] == ARROW_2ND_LETTER_LIST)
4659 10 : FillFieldList<uint32_t, uint16_t, int>(array, iOGRFieldIndex,
4660 : nOffsettedIndex,
4661 : childArray, oFeature);
4662 : else
4663 4 : FillFieldList<uint64_t, uint16_t, int>(array, iOGRFieldIndex,
4664 : nOffsettedIndex,
4665 : childArray, oFeature);
4666 : }
4667 176 : else if (IsInt32(childFormat))
4668 : {
4669 18 : if (format[1] == ARROW_2ND_LETTER_LIST)
4670 14 : FillFieldList<uint32_t, int32_t, int>(array, iOGRFieldIndex,
4671 : nOffsettedIndex,
4672 : childArray, oFeature);
4673 : else
4674 4 : FillFieldList<uint64_t, int32_t, int>(array, iOGRFieldIndex,
4675 : nOffsettedIndex,
4676 : childArray, oFeature);
4677 : }
4678 158 : else if (IsUInt32(childFormat))
4679 : {
4680 8 : if (format[1] == ARROW_2ND_LETTER_LIST)
4681 4 : FillFieldList<uint32_t, uint32_t, GIntBig>(
4682 : array, iOGRFieldIndex, nOffsettedIndex, childArray,
4683 : oFeature);
4684 : else
4685 4 : FillFieldList<uint64_t, uint32_t, GIntBig>(
4686 : array, iOGRFieldIndex, nOffsettedIndex, childArray,
4687 : oFeature);
4688 : }
4689 150 : else if (IsInt64(childFormat))
4690 : {
4691 31 : if (format[1] == ARROW_2ND_LETTER_LIST)
4692 27 : FillFieldList<uint32_t, int64_t, GIntBig>(array, iOGRFieldIndex,
4693 : nOffsettedIndex,
4694 : childArray, oFeature);
4695 : else
4696 4 : FillFieldList<uint64_t, int64_t, GIntBig>(array, iOGRFieldIndex,
4697 : nOffsettedIndex,
4698 : childArray, oFeature);
4699 : }
4700 119 : else if (IsUInt64(childFormat)) // (lossy conversion)
4701 : {
4702 14 : if (format[1] == ARROW_2ND_LETTER_LIST)
4703 10 : FillFieldList<uint32_t, uint64_t, double>(array, iOGRFieldIndex,
4704 : nOffsettedIndex,
4705 : childArray, oFeature);
4706 : else
4707 4 : FillFieldList<uint64_t, uint64_t, double>(array, iOGRFieldIndex,
4708 : nOffsettedIndex,
4709 : childArray, oFeature);
4710 : }
4711 105 : else if (IsFloat16(childFormat))
4712 : {
4713 8 : if (format[1] == ARROW_2ND_LETTER_LIST)
4714 4 : FillFieldListFromHalfFloat<uint32_t>(array, iOGRFieldIndex,
4715 : nOffsettedIndex,
4716 : childArray, oFeature);
4717 : else
4718 4 : FillFieldListFromHalfFloat<uint64_t>(array, iOGRFieldIndex,
4719 : nOffsettedIndex,
4720 : childArray, oFeature);
4721 : }
4722 97 : else if (IsFloat32(childFormat))
4723 : {
4724 16 : if (format[1] == ARROW_2ND_LETTER_LIST)
4725 12 : FillFieldList<uint32_t, float, double>(array, iOGRFieldIndex,
4726 : nOffsettedIndex,
4727 : childArray, oFeature);
4728 : else
4729 4 : FillFieldList<uint64_t, float, double>(array, iOGRFieldIndex,
4730 : nOffsettedIndex,
4731 : childArray, oFeature);
4732 : }
4733 81 : else if (IsFloat64(childFormat))
4734 : {
4735 18 : if (format[1] == ARROW_2ND_LETTER_LIST)
4736 14 : FillFieldList<uint32_t, double, double>(array, iOGRFieldIndex,
4737 : nOffsettedIndex,
4738 : childArray, oFeature);
4739 : else
4740 4 : FillFieldList<uint64_t, double, double>(array, iOGRFieldIndex,
4741 : nOffsettedIndex,
4742 : childArray, oFeature);
4743 : }
4744 63 : else if (IsString(childFormat))
4745 : {
4746 18 : if (format[1] == ARROW_2ND_LETTER_LIST)
4747 14 : FillFieldListFromString<uint32_t, uint32_t>(
4748 : array, iOGRFieldIndex, nOffsettedIndex, childArray,
4749 : oFeature);
4750 : else
4751 4 : FillFieldListFromString<uint64_t, uint32_t>(
4752 : array, iOGRFieldIndex, nOffsettedIndex, childArray,
4753 : oFeature);
4754 : }
4755 45 : else if (IsLargeString(childFormat))
4756 : {
4757 14 : if (format[1] == ARROW_2ND_LETTER_LIST)
4758 10 : FillFieldListFromString<uint32_t, uint64_t>(
4759 : array, iOGRFieldIndex, nOffsettedIndex, childArray,
4760 : oFeature);
4761 : else
4762 4 : FillFieldListFromString<uint64_t, uint64_t>(
4763 : array, iOGRFieldIndex, nOffsettedIndex, childArray,
4764 : oFeature);
4765 : }
4766 31 : else if (format[1] == ARROW_2ND_LETTER_LIST)
4767 : {
4768 31 : const size_t iFeature =
4769 31 : static_cast<size_t>(nOffsettedIndex - array->offset);
4770 31 : oFeature.SetField(iOGRFieldIndex,
4771 62 : GetListAsJSON<uint32_t>(schema, array, iFeature)
4772 62 : .Format(CPLJSONObject::PrettyFormat::Plain)
4773 : .c_str());
4774 : }
4775 : else
4776 : {
4777 0 : const size_t iFeature =
4778 0 : static_cast<size_t>(nOffsettedIndex - array->offset);
4779 0 : oFeature.SetField(iOGRFieldIndex,
4780 0 : GetListAsJSON<uint64_t>(schema, array, iFeature)
4781 0 : .Format(CPLJSONObject::PrettyFormat::Plain)
4782 : .c_str());
4783 : }
4784 : }
4785 243 : else if (IsDecimal(format))
4786 : {
4787 4 : int nPrecision = 0;
4788 4 : int nScale = 0;
4789 4 : int nWidthInBytes = 0;
4790 4 : if (!ParseDecimalFormat(format, nPrecision, nScale, nWidthInBytes))
4791 : {
4792 0 : CPLAssert(false);
4793 : }
4794 :
4795 : // fits on a int64
4796 4 : CPLAssert(nPrecision <= 19);
4797 : // either 128 or 256 bits
4798 4 : CPLAssert((nWidthInBytes % 8) == 0);
4799 4 : const int nWidthIn64BitWord = nWidthInBytes / 8;
4800 4 : const size_t iFeature =
4801 4 : static_cast<size_t>(nOffsettedIndex - array->offset);
4802 4 : oFeature.SetField(
4803 : iOGRFieldIndex,
4804 : GetValueDecimal(array, nWidthIn64BitWord, nScale, iFeature));
4805 4 : return true;
4806 : }
4807 239 : else if (IsMap(format))
4808 : {
4809 239 : const size_t iFeature =
4810 239 : static_cast<size_t>(nOffsettedIndex - array->offset);
4811 239 : oFeature.SetField(iOGRFieldIndex,
4812 478 : GetMapAsJSON(schema, array, iFeature)
4813 478 : .Format(CPLJSONObject::PrettyFormat::Plain)
4814 : .c_str());
4815 : }
4816 : else
4817 : {
4818 0 : return false;
4819 : }
4820 :
4821 819 : return true;
4822 : }
4823 :
4824 : /************************************************************************/
4825 : /* FillValidityArrayFromAttrQuery() */
4826 : /************************************************************************/
4827 :
4828 134 : static size_t FillValidityArrayFromAttrQuery(
4829 : const OGRLayer *poLayer, OGRFeatureQuery *poAttrQuery,
4830 : const struct ArrowSchema *schema, struct ArrowArray *array,
4831 : std::vector<bool> &abyValidityFromFilters, CSLConstList papszOptions)
4832 : {
4833 134 : size_t nCountIntersecting = 0;
4834 134 : auto poFeatureDefn = const_cast<OGRLayer *>(poLayer)->GetLayerDefn();
4835 268 : OGRFeature oFeature(poFeatureDefn);
4836 :
4837 268 : std::map<std::string, std::vector<int>> oMapFieldNameToArrowPath;
4838 268 : std::vector<int> anArrowPathTmp;
4839 134 : BuildMapFieldNameToArrowPath(schema, oMapFieldNameToArrowPath,
4840 268 : std::string(), anArrowPathTmp);
4841 :
4842 : struct UsedFieldsInfo
4843 : {
4844 : int iOGRFieldIndex{};
4845 : std::vector<int> anArrowPath{};
4846 : };
4847 :
4848 268 : std::vector<UsedFieldsInfo> aoUsedFieldsInfo;
4849 :
4850 134 : bool bNeedsFID = false;
4851 268 : const CPLStringList aosUsedFields(poAttrQuery->GetUsedFields());
4852 252 : for (int i = 0; i < aosUsedFields.size(); ++i)
4853 : {
4854 118 : int iOGRFieldIndex = poFeatureDefn->GetFieldIndex(aosUsedFields[i]);
4855 118 : if (iOGRFieldIndex >= 0)
4856 : {
4857 112 : const auto oIter = oMapFieldNameToArrowPath.find(aosUsedFields[i]);
4858 112 : if (oIter != oMapFieldNameToArrowPath.end())
4859 : {
4860 224 : UsedFieldsInfo info;
4861 112 : info.iOGRFieldIndex = iOGRFieldIndex;
4862 112 : info.anArrowPath = oIter->second;
4863 112 : aoUsedFieldsInfo.push_back(info);
4864 : }
4865 : else
4866 : {
4867 0 : CPLError(CE_Failure, CPLE_AppDefined,
4868 : "Cannot find %s in oMapFieldNameToArrowPath",
4869 : aosUsedFields[i]);
4870 : }
4871 : }
4872 6 : else if (EQUAL(aosUsedFields[i], "FID"))
4873 : {
4874 6 : bNeedsFID = true;
4875 : }
4876 : else
4877 : {
4878 0 : CPLDebug("OGR", "Cannot find used field %s", aosUsedFields[i]);
4879 : }
4880 : }
4881 :
4882 134 : const size_t nLength = abyValidityFromFilters.size();
4883 :
4884 134 : GIntBig nBaseSeqFID = -1;
4885 268 : std::vector<int> anArrowPathToFIDColumn;
4886 134 : if (bNeedsFID)
4887 : {
4888 : // BASE_SEQUENTIAL_FID is set when there is no Arrow column for the FID
4889 : // and we assume sequential FID numbering
4890 : const char *pszBaseSeqFID =
4891 6 : CSLFetchNameValue(papszOptions, "BASE_SEQUENTIAL_FID");
4892 6 : if (pszBaseSeqFID)
4893 : {
4894 5 : nBaseSeqFID = CPLAtoGIntBig(pszBaseSeqFID);
4895 :
4896 : // Optimizimation for "FID = constant"
4897 : swq_expr_node *poNode =
4898 5 : static_cast<swq_expr_node *>(poAttrQuery->GetSWQExpr());
4899 15 : if (poNode->eNodeType == SNT_OPERATION &&
4900 5 : poNode->nOperation == SWQ_EQ && poNode->nSubExprCount == 2 &&
4901 2 : poNode->papoSubExpr[0]->eNodeType == SNT_COLUMN &&
4902 2 : poNode->papoSubExpr[1]->eNodeType == SNT_CONSTANT &&
4903 2 : poNode->papoSubExpr[0]->field_index ==
4904 12 : poFeatureDefn->GetFieldCount() + SPF_FID &&
4905 2 : poNode->papoSubExpr[1]->field_type == SWQ_INTEGER64)
4906 : {
4907 2 : if (nBaseSeqFID + static_cast<int64_t>(nLength) <
4908 2 : poNode->papoSubExpr[1]->int_value ||
4909 2 : nBaseSeqFID > poNode->papoSubExpr[1]->int_value)
4910 : {
4911 0 : return 0;
4912 : }
4913 : }
4914 : }
4915 : else
4916 : {
4917 : const char *pszFIDColumn =
4918 1 : const_cast<OGRLayer *>(poLayer)->GetFIDColumn();
4919 1 : if (pszFIDColumn && pszFIDColumn[0])
4920 : {
4921 1 : const auto oIter = oMapFieldNameToArrowPath.find(pszFIDColumn);
4922 1 : if (oIter != oMapFieldNameToArrowPath.end())
4923 : {
4924 1 : anArrowPathToFIDColumn = oIter->second;
4925 : }
4926 : }
4927 1 : if (anArrowPathToFIDColumn.empty())
4928 : {
4929 0 : CPLError(CE_Failure, CPLE_AppDefined,
4930 : "Filtering on FID requested but cannot associate a "
4931 : "FID with Arrow records");
4932 : }
4933 : }
4934 : }
4935 :
4936 555 : for (size_t iRow = 0; iRow < nLength; ++iRow)
4937 : {
4938 421 : if (!abyValidityFromFilters[iRow])
4939 2 : continue;
4940 :
4941 419 : if (bNeedsFID)
4942 : {
4943 21 : if (nBaseSeqFID >= 0)
4944 : {
4945 11 : oFeature.SetFID(nBaseSeqFID + iRow);
4946 : }
4947 10 : else if (!anArrowPathToFIDColumn.empty())
4948 : {
4949 10 : oFeature.SetFID(OGRNullFID);
4950 :
4951 10 : const struct ArrowSchema *psSchemaField = schema;
4952 10 : const struct ArrowArray *psArray = array;
4953 10 : bool bSkip = false;
4954 20 : for (size_t i = 0; i < anArrowPathToFIDColumn.size(); ++i)
4955 : {
4956 10 : const int iChild = anArrowPathToFIDColumn[i];
4957 10 : if (i > 0)
4958 : {
4959 0 : const uint8_t *pabyValidity =
4960 0 : psArray->null_count == 0
4961 0 : ? nullptr
4962 : : static_cast<uint8_t *>(
4963 0 : const_cast<void *>(psArray->buffers[0]));
4964 0 : const size_t nOffsettedIndex =
4965 0 : static_cast<size_t>(iRow + psArray->offset);
4966 0 : if (pabyValidity &&
4967 0 : !TestBit(pabyValidity, nOffsettedIndex))
4968 : {
4969 0 : bSkip = true;
4970 0 : break;
4971 : }
4972 : }
4973 :
4974 10 : psSchemaField = psSchemaField->children[iChild];
4975 10 : psArray = psArray->children[iChild];
4976 : }
4977 10 : if (bSkip)
4978 0 : continue;
4979 :
4980 10 : const char *format = psSchemaField->format;
4981 10 : const uint8_t *pabyValidity =
4982 10 : psArray->null_count == 0
4983 10 : ? nullptr
4984 : : static_cast<uint8_t *>(
4985 0 : const_cast<void *>(psArray->buffers[0]));
4986 10 : const size_t nOffsettedIndex =
4987 10 : static_cast<size_t>(iRow + psArray->offset);
4988 10 : if (pabyValidity && !TestBit(pabyValidity, nOffsettedIndex))
4989 : {
4990 : // do nothing
4991 : }
4992 10 : else if (IsInt32(format))
4993 : {
4994 0 : oFeature.SetFID(static_cast<const int32_t *>(
4995 0 : psArray->buffers[1])[nOffsettedIndex]);
4996 : }
4997 10 : else if (IsInt64(format))
4998 : {
4999 10 : oFeature.SetFID(static_cast<const int64_t *>(
5000 10 : psArray->buffers[1])[nOffsettedIndex]);
5001 : }
5002 : }
5003 : }
5004 :
5005 725 : for (const auto &sInfo : aoUsedFieldsInfo)
5006 : {
5007 306 : const int iOGRFieldIndex = sInfo.iOGRFieldIndex;
5008 306 : const struct ArrowSchema *psSchemaField = schema;
5009 306 : const struct ArrowArray *psArray = array;
5010 306 : bool bSkip = false;
5011 612 : for (size_t i = 0; i < sInfo.anArrowPath.size(); ++i)
5012 : {
5013 306 : const int iChild = sInfo.anArrowPath[i];
5014 306 : if (i > 0)
5015 : {
5016 0 : const uint8_t *pabyValidity =
5017 0 : psArray->null_count == 0
5018 0 : ? nullptr
5019 : : static_cast<uint8_t *>(
5020 0 : const_cast<void *>(psArray->buffers[0]));
5021 0 : const size_t nOffsettedIndex =
5022 0 : static_cast<size_t>(iRow + psArray->offset);
5023 0 : if (pabyValidity && !TestBit(pabyValidity, nOffsettedIndex))
5024 : {
5025 0 : bSkip = true;
5026 0 : oFeature.SetFieldNull(iOGRFieldIndex);
5027 0 : break;
5028 : }
5029 : }
5030 :
5031 306 : psSchemaField = psSchemaField->children[iChild];
5032 306 : psArray = psArray->children[iChild];
5033 : }
5034 306 : if (bSkip)
5035 0 : continue;
5036 :
5037 306 : const char *format = psSchemaField->format;
5038 306 : const uint8_t *pabyValidity =
5039 306 : psArray->null_count == 0
5040 306 : ? nullptr
5041 : : static_cast<uint8_t *>(
5042 129 : const_cast<void *>(psArray->buffers[0]));
5043 306 : const size_t nOffsettedIndex =
5044 306 : static_cast<size_t>(iRow + psArray->offset);
5045 306 : if (pabyValidity && !TestBit(pabyValidity, nOffsettedIndex))
5046 : {
5047 38 : oFeature.SetFieldNull(iOGRFieldIndex);
5048 : }
5049 268 : else if (IsBoolean(format))
5050 : {
5051 78 : oFeature.SetField(
5052 : iOGRFieldIndex,
5053 78 : TestBit(static_cast<const uint8_t *>(psArray->buffers[1]),
5054 : nOffsettedIndex));
5055 : }
5056 190 : else if (IsInt8(format))
5057 : {
5058 8 : oFeature.SetField(iOGRFieldIndex,
5059 8 : static_cast<const int8_t *>(
5060 8 : psArray->buffers[1])[nOffsettedIndex]);
5061 : }
5062 182 : else if (IsUInt8(format))
5063 : {
5064 4 : oFeature.SetField(iOGRFieldIndex,
5065 4 : static_cast<const uint8_t *>(
5066 4 : psArray->buffers[1])[nOffsettedIndex]);
5067 : }
5068 178 : else if (IsInt16(format))
5069 : {
5070 16 : oFeature.SetField(iOGRFieldIndex,
5071 16 : static_cast<const int16_t *>(
5072 16 : psArray->buffers[1])[nOffsettedIndex]);
5073 : }
5074 162 : else if (IsUInt16(format))
5075 : {
5076 2 : oFeature.SetField(iOGRFieldIndex,
5077 2 : static_cast<const uint16_t *>(
5078 2 : psArray->buffers[1])[nOffsettedIndex]);
5079 : }
5080 160 : else if (IsInt32(format))
5081 : {
5082 10 : oFeature.SetField(iOGRFieldIndex,
5083 10 : static_cast<const int32_t *>(
5084 10 : psArray->buffers[1])[nOffsettedIndex]);
5085 : }
5086 150 : else if (IsUInt32(format))
5087 : {
5088 0 : oFeature.SetField(
5089 : iOGRFieldIndex,
5090 0 : static_cast<GIntBig>(static_cast<const uint32_t *>(
5091 0 : psArray->buffers[1])[nOffsettedIndex]));
5092 : }
5093 150 : else if (IsInt64(format))
5094 : {
5095 4 : oFeature.SetField(
5096 : iOGRFieldIndex,
5097 4 : static_cast<GIntBig>(static_cast<const int64_t *>(
5098 4 : psArray->buffers[1])[nOffsettedIndex]));
5099 : }
5100 146 : else if (IsUInt64(format))
5101 : {
5102 4 : oFeature.SetField(
5103 : iOGRFieldIndex,
5104 4 : static_cast<double>(static_cast<const uint64_t *>(
5105 4 : psArray->buffers[1])[nOffsettedIndex]));
5106 : }
5107 142 : else if (IsFloat32(format))
5108 : {
5109 2 : oFeature.SetField(iOGRFieldIndex,
5110 2 : static_cast<const float *>(
5111 2 : psArray->buffers[1])[nOffsettedIndex]);
5112 : }
5113 140 : else if (IsFloat64(format))
5114 : {
5115 26 : oFeature.SetField(iOGRFieldIndex,
5116 26 : static_cast<const double *>(
5117 26 : psArray->buffers[1])[nOffsettedIndex]);
5118 : }
5119 114 : else if (IsString(format))
5120 : {
5121 18 : const auto nOffset = static_cast<const uint32_t *>(
5122 18 : psArray->buffers[1])[nOffsettedIndex];
5123 18 : const auto nNextOffset = static_cast<const uint32_t *>(
5124 18 : psArray->buffers[1])[nOffsettedIndex + 1];
5125 18 : const GByte *pabyData =
5126 18 : static_cast<const GByte *>(psArray->buffers[2]);
5127 18 : const uint32_t nSize = nNextOffset - nOffset;
5128 18 : CPLAssert(oFeature.GetFieldDefnRef(iOGRFieldIndex)->GetType() ==
5129 : OFTString);
5130 18 : char *pszStr = static_cast<char *>(CPLMalloc(nSize + 1));
5131 18 : memcpy(pszStr, pabyData + nOffset, nSize);
5132 18 : pszStr[nSize] = 0;
5133 18 : OGRField *psField = oFeature.GetRawFieldRef(iOGRFieldIndex);
5134 18 : if (IsValidField(psField))
5135 12 : CPLFree(psField->String);
5136 18 : psField->String = pszStr;
5137 : }
5138 96 : else if (IsLargeString(format))
5139 : {
5140 6 : const auto nOffset = static_cast<const uint64_t *>(
5141 6 : psArray->buffers[1])[nOffsettedIndex];
5142 6 : const auto nNextOffset = static_cast<const uint64_t *>(
5143 6 : psArray->buffers[1])[nOffsettedIndex + 1];
5144 6 : const GByte *pabyData =
5145 6 : static_cast<const GByte *>(psArray->buffers[2]);
5146 6 : const size_t nSize = static_cast<size_t>(nNextOffset - nOffset);
5147 6 : char *pszStr = static_cast<char *>(CPLMalloc(nSize + 1));
5148 6 : memcpy(pszStr, pabyData + static_cast<size_t>(nOffset), nSize);
5149 6 : pszStr[nSize] = 0;
5150 6 : OGRField *psField = oFeature.GetRawFieldRef(iOGRFieldIndex);
5151 6 : if (IsValidField(psField))
5152 3 : CPLFree(psField->String);
5153 6 : psField->String = pszStr;
5154 : }
5155 90 : else if (IsBinary(format))
5156 : {
5157 5 : const auto nOffset = static_cast<const uint32_t *>(
5158 5 : psArray->buffers[1])[nOffsettedIndex];
5159 5 : const auto nNextOffset = static_cast<const uint32_t *>(
5160 5 : psArray->buffers[1])[nOffsettedIndex + 1];
5161 5 : const GByte *pabyData =
5162 5 : static_cast<const GByte *>(psArray->buffers[2]);
5163 5 : const uint32_t nSize = nNextOffset - nOffset;
5164 10 : if (nSize >
5165 5 : static_cast<size_t>(std::numeric_limits<int32_t>::max()))
5166 : {
5167 0 : abyValidityFromFilters.clear();
5168 0 : abyValidityFromFilters.resize(nLength);
5169 0 : CPLError(CE_Failure, CPLE_AppDefined,
5170 : "Unexpected error in PostFilterArrowArray(): too "
5171 : "large binary");
5172 0 : return 0;
5173 : }
5174 5 : oFeature.SetField(iOGRFieldIndex, static_cast<int>(nSize),
5175 5 : pabyData + nOffset);
5176 : }
5177 85 : else if (IsLargeBinary(format))
5178 : {
5179 5 : const auto nOffset = static_cast<const uint64_t *>(
5180 5 : psArray->buffers[1])[nOffsettedIndex];
5181 5 : const auto nNextOffset = static_cast<const uint64_t *>(
5182 5 : psArray->buffers[1])[nOffsettedIndex + 1];
5183 5 : const GByte *pabyData =
5184 5 : static_cast<const GByte *>(psArray->buffers[2]);
5185 5 : const uint64_t nSize = nNextOffset - nOffset;
5186 5 : if (nSize >
5187 5 : static_cast<uint64_t>(std::numeric_limits<int32_t>::max()))
5188 : {
5189 0 : abyValidityFromFilters.clear();
5190 0 : abyValidityFromFilters.resize(nLength);
5191 0 : CPLError(CE_Failure, CPLE_AppDefined,
5192 : "Unexpected error in PostFilterArrowArray(): too "
5193 : "large binary");
5194 0 : return 0;
5195 : }
5196 5 : oFeature.SetField(iOGRFieldIndex, static_cast<int>(nSize),
5197 5 : pabyData + nOffset);
5198 : }
5199 80 : else if (!SetFieldForOtherFormats(oFeature, iOGRFieldIndex,
5200 : nOffsettedIndex, psSchemaField,
5201 : psArray))
5202 : {
5203 0 : abyValidityFromFilters.clear();
5204 0 : abyValidityFromFilters.resize(nLength);
5205 0 : CPLError(
5206 : CE_Failure, CPLE_AppDefined,
5207 : "Unexpected error in PostFilterArrowArray(): unhandled "
5208 : "field format: %s",
5209 : format);
5210 0 : return 0;
5211 : }
5212 : }
5213 419 : if (poAttrQuery->Evaluate(&oFeature))
5214 : {
5215 215 : nCountIntersecting++;
5216 : }
5217 : else
5218 : {
5219 204 : abyValidityFromFilters[iRow] = false;
5220 : }
5221 : }
5222 134 : return nCountIntersecting;
5223 : }
5224 :
5225 : /************************************************************************/
5226 : /* OGRLayer::PostFilterArrowArray() */
5227 : /************************************************************************/
5228 :
5229 : /** Remove rows that aren't selected by the spatial or attribute filter.
5230 : *
5231 : * Assumes that CanPostFilterArrowArray() has been called and returned true.
5232 : */
5233 153 : void OGRLayer::PostFilterArrowArray(const struct ArrowSchema *schema,
5234 : struct ArrowArray *array,
5235 : CSLConstList papszOptions) const
5236 : {
5237 153 : if (!m_poFilterGeom && !m_poAttrQuery)
5238 43 : return;
5239 :
5240 153 : CPLAssert(schema->n_children == array->n_children);
5241 :
5242 153 : int64_t iGeomField = -1;
5243 153 : if (m_poFilterGeom)
5244 : {
5245 : const char *pszGeomFieldName =
5246 : const_cast<OGRLayer *>(this)
5247 21 : ->GetLayerDefn()
5248 21 : ->GetGeomFieldDefn(m_iGeomFieldFilter)
5249 21 : ->GetNameRef();
5250 837 : for (int64_t iField = 0; iField < schema->n_children; ++iField)
5251 : {
5252 837 : const auto fieldSchema = schema->children[iField];
5253 837 : if (strcmp(fieldSchema->name, pszGeomFieldName) == 0)
5254 : {
5255 21 : iGeomField = iField;
5256 21 : break;
5257 : }
5258 816 : CPLAssert(array->children[iField]->length ==
5259 : array->children[0]->length);
5260 : }
5261 : // Guaranteed if CanPostFilterArrowArray() returned true
5262 21 : CPLAssert(iGeomField >= 0);
5263 21 : CPLAssert(IsBinary(schema->children[iGeomField]->format) ||
5264 : IsLargeBinary(schema->children[iGeomField]->format));
5265 21 : CPLAssert(array->children[iGeomField]->n_buffers == 3);
5266 : }
5267 :
5268 153 : std::vector<bool> abyValidityFromFilters;
5269 153 : const size_t nLength = static_cast<size_t>(array->length);
5270 : const size_t nCountIntersectingGeom =
5271 174 : m_poFilterGeom ? (IsBinary(schema->children[iGeomField]->format)
5272 42 : ? FillValidityArrayFromWKBArray<uint32_t>(
5273 21 : array->children[iGeomField], this,
5274 : abyValidityFromFilters)
5275 0 : : FillValidityArrayFromWKBArray<uint64_t>(
5276 0 : array->children[iGeomField], this,
5277 : abyValidityFromFilters))
5278 153 : : nLength;
5279 153 : if (!m_poFilterGeom)
5280 132 : abyValidityFromFilters.resize(nLength, true);
5281 : const size_t nCountIntersecting =
5282 134 : m_poAttrQuery && nCountIntersectingGeom > 0
5283 306 : ? FillValidityArrayFromAttrQuery(this, m_poAttrQuery, schema, array,
5284 : abyValidityFromFilters,
5285 : papszOptions)
5286 19 : : m_poFilterGeom ? nCountIntersectingGeom
5287 153 : : nLength;
5288 : // Nothing to do ?
5289 153 : if (nCountIntersecting == nLength)
5290 : {
5291 : // CPLDebug("OGR", "All rows match filter");
5292 43 : return;
5293 : }
5294 :
5295 110 : if (nCountIntersecting == 0)
5296 : {
5297 27 : array->length = 0;
5298 : }
5299 83 : else if (!CompactStructArray(schema, array, 0, abyValidityFromFilters,
5300 : nCountIntersecting))
5301 : {
5302 0 : array->release(array);
5303 0 : memset(array, 0, sizeof(*array));
5304 : }
5305 : }
5306 :
5307 : /************************************************************************/
5308 : /* OGRCloneArrowArray */
5309 : /************************************************************************/
5310 :
5311 13947 : static bool OGRCloneArrowArray(const struct ArrowSchema *schema,
5312 : const struct ArrowArray *src_array,
5313 : struct ArrowArray *out_array,
5314 : size_t nParentOffset)
5315 : {
5316 13947 : memset(out_array, 0, sizeof(*out_array));
5317 13947 : const size_t nLength =
5318 13947 : static_cast<size_t>(src_array->length) - nParentOffset;
5319 13947 : out_array->length = nLength;
5320 13947 : out_array->null_count = src_array->null_count;
5321 13947 : out_array->release = OGRLayerDefaultReleaseArray;
5322 :
5323 13947 : bool bRet = true;
5324 :
5325 13947 : out_array->n_buffers = src_array->n_buffers;
5326 27894 : out_array->buffers = static_cast<const void **>(CPLCalloc(
5327 13947 : static_cast<size_t>(src_array->n_buffers), sizeof(const void *)));
5328 13947 : CPLAssert(static_cast<size_t>(src_array->length) >= nParentOffset);
5329 13947 : const char *format = schema->format;
5330 13947 : const auto nOffset = static_cast<size_t>(src_array->offset) + nParentOffset;
5331 41479 : for (int64_t i = 0; i < src_array->n_buffers; ++i)
5332 : {
5333 27532 : if (i == 0 || IsBoolean(format))
5334 : {
5335 14318 : if (i == 1)
5336 : {
5337 371 : CPLAssert(src_array->buffers[i]);
5338 : }
5339 14318 : if (src_array->buffers[i])
5340 : {
5341 8765 : const size_t nBytes = nLength ? (nLength + 7) / 8 : 1;
5342 : uint8_t *CPL_RESTRICT p = static_cast<uint8_t *>(
5343 8765 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nBytes));
5344 8765 : if (!p)
5345 : {
5346 0 : bRet = false;
5347 0 : break;
5348 : }
5349 8765 : const auto *CPL_RESTRICT pSrcArray =
5350 8765 : static_cast<const uint8_t *>(src_array->buffers[i]);
5351 8765 : if ((nOffset % 8) != 0)
5352 : {
5353 : // Make sure last byte is fully initialized
5354 2281 : p[nBytes - 1] = 0;
5355 7359 : for (size_t iRow = 0; iRow < nLength; ++iRow)
5356 : {
5357 5078 : if (TestBit(pSrcArray, nOffset + iRow))
5358 4949 : SetBit(p, iRow);
5359 : else
5360 129 : UnsetBit(p, iRow);
5361 : }
5362 : }
5363 : else
5364 : {
5365 6484 : memcpy(p, pSrcArray + nOffset / 8, nBytes);
5366 : }
5367 8765 : out_array->buffers[i] = p;
5368 : }
5369 : }
5370 13214 : else if (i == 1)
5371 : {
5372 11083 : CPLAssert(src_array->buffers[i]);
5373 11083 : size_t nEltSize = 0;
5374 11083 : size_t nExtraElt = 0;
5375 11083 : if (IsUInt8(format) || IsInt8(format))
5376 742 : nEltSize = sizeof(uint8_t);
5377 10341 : else if (IsUInt16(format) || IsInt16(format) || IsFloat16(format))
5378 762 : nEltSize = sizeof(uint16_t);
5379 19138 : else if (IsUInt32(format) || IsInt32(format) || IsFloat32(format) ||
5380 27618 : strcmp(format, "tdD") == 0 || strcmp(format, "tts") == 0 ||
5381 8480 : strcmp(format, "ttm") == 0)
5382 : {
5383 1316 : nEltSize = sizeof(uint32_t);
5384 : }
5385 12719 : else if (IsString(format) || IsBinary(format) || IsList(format) ||
5386 4456 : IsMap(format))
5387 : {
5388 4496 : nEltSize = sizeof(uint32_t);
5389 4496 : nExtraElt = 1;
5390 : }
5391 7163 : else if (IsUInt64(format) || IsInt64(format) || IsFloat64(format) ||
5392 1648 : strcmp(format, "tdm") == 0 || strcmp(format, "ttu") == 0 ||
5393 7163 : strcmp(format, "ttn") == 0 || IsTimestamp(format))
5394 : {
5395 2939 : nEltSize = sizeof(uint64_t);
5396 : }
5397 1318 : else if (IsLargeString(format) || IsLargeBinary(format) ||
5398 490 : IsLargeList(format))
5399 : {
5400 343 : nEltSize = sizeof(uint64_t);
5401 343 : nExtraElt = 1;
5402 : }
5403 485 : else if (IsFixedWidthBinary(format))
5404 : {
5405 111 : nEltSize = GetFixedWithBinary(format);
5406 : }
5407 374 : else if (IsDecimal(format))
5408 : {
5409 374 : int nPrecision = 0;
5410 374 : int nScale = 0;
5411 374 : int nWidthInBytes = 0;
5412 374 : if (!ParseDecimalFormat(format, nPrecision, nScale,
5413 : nWidthInBytes))
5414 : {
5415 0 : CPLError(
5416 : CE_Failure, CPLE_AppDefined,
5417 : "Unexpected error in OGRCloneArrowArray(): unhandled "
5418 : "field format: %s",
5419 : format);
5420 :
5421 0 : return false;
5422 : }
5423 374 : nEltSize = nWidthInBytes;
5424 : }
5425 11083 : if (nEltSize)
5426 : {
5427 11083 : void *p = VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
5428 : nLength ? nEltSize * (nLength + nExtraElt) : 1);
5429 11083 : if (!p)
5430 : {
5431 0 : bRet = false;
5432 0 : break;
5433 : }
5434 11083 : if (nLength)
5435 : {
5436 12876 : if ((IsString(format) || IsBinary(format)) &&
5437 1793 : static_cast<const uint32_t *>(
5438 1793 : src_array->buffers[1])[nOffset] != 0)
5439 : {
5440 258 : const auto *CPL_RESTRICT pSrcOffsets =
5441 258 : static_cast<const uint32_t *>(
5442 258 : src_array->buffers[1]) +
5443 : nOffset;
5444 258 : const auto nShiftOffset = pSrcOffsets[0];
5445 258 : auto *CPL_RESTRICT pDstOffsets =
5446 : static_cast<uint32_t *>(p);
5447 1118 : for (size_t iRow = 0; iRow <= nLength; ++iRow)
5448 : {
5449 860 : pDstOffsets[iRow] =
5450 860 : pSrcOffsets[iRow] - nShiftOffset;
5451 : }
5452 : }
5453 11163 : else if ((IsLargeString(format) || IsLargeBinary(format)) &&
5454 338 : static_cast<const uint64_t *>(
5455 338 : src_array->buffers[1])[nOffset] != 0)
5456 : {
5457 86 : const auto *CPL_RESTRICT pSrcOffsets =
5458 86 : static_cast<const uint64_t *>(
5459 86 : src_array->buffers[1]) +
5460 : nOffset;
5461 86 : const auto nShiftOffset = pSrcOffsets[0];
5462 86 : auto *CPL_RESTRICT pDstOffsets =
5463 : static_cast<uint64_t *>(p);
5464 344 : for (size_t iRow = 0; iRow <= nLength; ++iRow)
5465 : {
5466 258 : pDstOffsets[iRow] =
5467 258 : pSrcOffsets[iRow] - nShiftOffset;
5468 : }
5469 : }
5470 : else
5471 : {
5472 10739 : memcpy(
5473 : p,
5474 10739 : static_cast<const GByte *>(src_array->buffers[i]) +
5475 10739 : nEltSize * nOffset,
5476 10739 : nEltSize * (nLength + nExtraElt));
5477 : }
5478 : }
5479 11083 : out_array->buffers[i] = p;
5480 : }
5481 : else
5482 : {
5483 0 : CPLError(CE_Failure, CPLE_AppDefined,
5484 : "OGRCloneArrowArray(): unhandled case, array = %s, "
5485 : "format = '%s', i = 1",
5486 0 : schema->name, format);
5487 0 : bRet = false;
5488 0 : break;
5489 : }
5490 : }
5491 2131 : else if (i == 2)
5492 : {
5493 2131 : CPLAssert(src_array->buffers[i]);
5494 2131 : size_t nSrcCharOffset = 0;
5495 2131 : size_t nCharCount = 0;
5496 2131 : if (IsString(format) || IsBinary(format))
5497 : {
5498 1793 : const auto *pSrcOffsets =
5499 1793 : static_cast<const uint32_t *>(src_array->buffers[1]) +
5500 : nOffset;
5501 1793 : nSrcCharOffset = pSrcOffsets[0];
5502 1793 : nCharCount = pSrcOffsets[nLength] - pSrcOffsets[0];
5503 : }
5504 338 : else if (IsLargeString(format) || IsLargeBinary(format))
5505 : {
5506 338 : const auto *pSrcOffsets =
5507 338 : static_cast<const uint64_t *>(src_array->buffers[1]) +
5508 : nOffset;
5509 338 : nSrcCharOffset = static_cast<size_t>(pSrcOffsets[0]);
5510 338 : nCharCount =
5511 338 : static_cast<size_t>(pSrcOffsets[nLength] - pSrcOffsets[0]);
5512 : }
5513 : else
5514 : {
5515 0 : CPLError(CE_Failure, CPLE_AppDefined,
5516 : "OGRCloneArrowArray(): unhandled case, array = %s, "
5517 : "format = '%s', i = 2",
5518 0 : schema->name, format);
5519 0 : bRet = false;
5520 0 : break;
5521 : }
5522 : void *p =
5523 2131 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nCharCount ? nCharCount : 1);
5524 2131 : if (!p)
5525 : {
5526 0 : bRet = false;
5527 0 : break;
5528 : }
5529 2131 : if (nCharCount)
5530 : {
5531 2131 : memcpy(p,
5532 2131 : static_cast<const GByte *>(src_array->buffers[i]) +
5533 : nSrcCharOffset,
5534 : nCharCount);
5535 : }
5536 2131 : out_array->buffers[i] = p;
5537 : }
5538 : else
5539 : {
5540 0 : CPLError(CE_Failure, CPLE_AppDefined,
5541 : "OGRCloneArrowArray(): unhandled case, array = %s, format "
5542 : "= '%s', i = 3",
5543 0 : schema->name, format);
5544 0 : bRet = false;
5545 0 : break;
5546 : }
5547 : }
5548 :
5549 13947 : if (bRet)
5550 : {
5551 13947 : out_array->n_children = src_array->n_children;
5552 13947 : out_array->children = static_cast<struct ArrowArray **>(
5553 13947 : CPLCalloc(static_cast<size_t>(src_array->n_children),
5554 : sizeof(struct ArrowArray *)));
5555 27653 : for (int64_t i = 0; i < src_array->n_children; ++i)
5556 : {
5557 27412 : out_array->children[i] = static_cast<struct ArrowArray *>(
5558 13706 : CPLCalloc(1, sizeof(struct ArrowArray)));
5559 39777 : if (!OGRCloneArrowArray(schema->children[i], src_array->children[i],
5560 13706 : out_array->children[i],
5561 13706 : IsFixedSizeList(format)
5562 1341 : ? nOffset * GetFixedSizeList(format)
5563 12365 : : IsStructure(format) ? nOffset
5564 : : 0))
5565 : {
5566 0 : bRet = false;
5567 0 : break;
5568 : }
5569 : }
5570 : }
5571 :
5572 13947 : if (bRet && src_array->dictionary)
5573 : {
5574 111 : out_array->dictionary = static_cast<struct ArrowArray *>(
5575 111 : CPLCalloc(1, sizeof(struct ArrowArray)));
5576 111 : bRet = OGRCloneArrowArray(schema->dictionary, src_array->dictionary,
5577 : out_array->dictionary, 0);
5578 : }
5579 :
5580 13947 : if (!bRet)
5581 : {
5582 0 : out_array->release(out_array);
5583 0 : memset(out_array, 0, sizeof(*out_array));
5584 : }
5585 13947 : return bRet;
5586 : }
5587 :
5588 : /** Full/deep copy of an array.
5589 : *
5590 : * Renormalize the offset of the array (and its children) to 0.
5591 : *
5592 : * In case of failure, out_array will be let in a released state.
5593 : *
5594 : * @param schema Schema of the array. Must *NOT* be NULL.
5595 : * @param src_array Source array. Must *NOT* be NULL.
5596 : * @param out_array Output array. Must *NOT* be NULL (but its content may be random)
5597 : * @return true if success.
5598 : */
5599 130 : bool OGRCloneArrowArray(const struct ArrowSchema *schema,
5600 : const struct ArrowArray *src_array,
5601 : struct ArrowArray *out_array)
5602 : {
5603 130 : return OGRCloneArrowArray(schema, src_array, out_array, 0);
5604 : }
5605 :
5606 : /************************************************************************/
5607 : /* OGRCloneArrowMetadata() */
5608 : /************************************************************************/
5609 :
5610 23 : static void *OGRCloneArrowMetadata(const void *pMetadata)
5611 : {
5612 23 : if (!pMetadata)
5613 19 : return nullptr;
5614 4 : std::vector<GByte> abyOut;
5615 4 : const GByte *pabyMetadata = static_cast<const GByte *>(pMetadata);
5616 : int32_t nKVP;
5617 4 : abyOut.insert(abyOut.end(), pabyMetadata, pabyMetadata + sizeof(int32_t));
5618 4 : memcpy(&nKVP, pabyMetadata, sizeof(int32_t));
5619 4 : pabyMetadata += sizeof(int32_t);
5620 8 : for (int i = 0; i < nKVP; ++i)
5621 : {
5622 : int32_t nSizeKey;
5623 0 : abyOut.insert(abyOut.end(), pabyMetadata,
5624 4 : pabyMetadata + sizeof(int32_t));
5625 4 : memcpy(&nSizeKey, pabyMetadata, sizeof(int32_t));
5626 4 : pabyMetadata += sizeof(int32_t);
5627 4 : abyOut.insert(abyOut.end(), pabyMetadata, pabyMetadata + nSizeKey);
5628 4 : pabyMetadata += nSizeKey;
5629 :
5630 : int32_t nSizeValue;
5631 0 : abyOut.insert(abyOut.end(), pabyMetadata,
5632 4 : pabyMetadata + sizeof(int32_t));
5633 4 : memcpy(&nSizeValue, pabyMetadata, sizeof(int32_t));
5634 4 : pabyMetadata += sizeof(int32_t);
5635 4 : abyOut.insert(abyOut.end(), pabyMetadata, pabyMetadata + nSizeValue);
5636 4 : pabyMetadata += nSizeValue;
5637 : }
5638 :
5639 4 : GByte *pabyOut = static_cast<GByte *>(VSI_MALLOC_VERBOSE(abyOut.size()));
5640 4 : if (pabyOut)
5641 4 : memcpy(pabyOut, abyOut.data(), abyOut.size());
5642 4 : return pabyOut;
5643 : }
5644 :
5645 : /************************************************************************/
5646 : /* OGRCloneArrowSchema() */
5647 : /************************************************************************/
5648 :
5649 : /** Full/deep copy of a schema.
5650 : *
5651 : * In case of failure, out_schema will be let in a released state.
5652 : *
5653 : * @param schema Schema to clone. Must *NOT* be NULL.
5654 : * @param out_schema Output schema. Must *NOT* be NULL (but its content may be random)
5655 : * @return true if success.
5656 : */
5657 23 : bool OGRCloneArrowSchema(const struct ArrowSchema *schema,
5658 : struct ArrowSchema *out_schema)
5659 : {
5660 23 : memset(out_schema, 0, sizeof(*out_schema));
5661 23 : out_schema->release = OGRLayerFullReleaseSchema;
5662 23 : out_schema->format = CPLStrdup(schema->format);
5663 23 : out_schema->name = CPLStrdup(schema->name);
5664 23 : out_schema->metadata = static_cast<const char *>(
5665 23 : const_cast<const void *>(OGRCloneArrowMetadata(schema->metadata)));
5666 23 : out_schema->flags = schema->flags;
5667 23 : if (schema->n_children)
5668 : {
5669 5 : out_schema->children =
5670 5 : static_cast<struct ArrowSchema **>(VSI_CALLOC_VERBOSE(
5671 : static_cast<int>(schema->n_children), sizeof(ArrowSchema *)));
5672 5 : if (!out_schema->children)
5673 : {
5674 0 : out_schema->release(out_schema);
5675 0 : return false;
5676 : }
5677 5 : out_schema->n_children = schema->n_children;
5678 23 : for (int i = 0; i < static_cast<int>(schema->n_children); ++i)
5679 : {
5680 36 : out_schema->children[i] = static_cast<struct ArrowSchema *>(
5681 18 : CPLMalloc(sizeof(ArrowSchema)));
5682 18 : if (!OGRCloneArrowSchema(schema->children[i],
5683 18 : out_schema->children[i]))
5684 : {
5685 0 : out_schema->release(out_schema);
5686 0 : return false;
5687 : }
5688 : }
5689 : }
5690 23 : if (schema->dictionary)
5691 : {
5692 0 : out_schema->dictionary =
5693 0 : static_cast<struct ArrowSchema *>(CPLMalloc(sizeof(ArrowSchema)));
5694 0 : if (!OGRCloneArrowSchema(schema->dictionary, out_schema->dictionary))
5695 : {
5696 0 : out_schema->release(out_schema);
5697 0 : return false;
5698 : }
5699 : }
5700 23 : return true;
5701 : }
5702 :
5703 : /************************************************************************/
5704 : /* OGRLayer::IsArrowSchemaSupported() */
5705 : /************************************************************************/
5706 :
5707 : const struct
5708 : {
5709 : const char *arrowType;
5710 : OGRFieldType eType;
5711 : OGRFieldSubType eSubType;
5712 : } gasArrowTypesToOGR[] = {
5713 : {"b", OFTInteger, OFSTBoolean}, {"c", OFTInteger, OFSTInt16}, // Int8
5714 : {"C", OFTInteger, OFSTInt16}, // UInt8
5715 : {"s", OFTInteger, OFSTInt16}, // Int16
5716 : {"S", OFTInteger, OFSTNone}, // UInt16
5717 : {"i", OFTInteger, OFSTNone}, // Int32
5718 : {"I", OFTInteger64, OFSTNone}, // UInt32
5719 : {"l", OFTInteger64, OFSTNone}, // Int64
5720 : {"L", OFTReal, OFSTNone}, // UInt64 (potentially lossy conversion if going through OGRFeature)
5721 : {"e", OFTReal, OFSTFloat32}, // float16
5722 : {"f", OFTReal, OFSTFloat32}, // float32
5723 : {"g", OFTReal, OFSTNone}, // float64
5724 : {"z", OFTBinary, OFSTNone}, // binary
5725 : {"Z", OFTBinary, OFSTNone}, // large binary (will be limited to 32 bit length though if going through OGRFeature!)
5726 : {"u", OFTString, OFSTNone}, // string
5727 : {"U", OFTString, OFSTNone}, // large string
5728 : {"tdD", OFTDate, OFSTNone}, // date32[days]
5729 : {"tdm", OFTDate, OFSTNone}, // date64[milliseconds]
5730 : {"tts", OFTTime, OFSTNone}, // time32 [seconds]
5731 : {"ttm", OFTTime, OFSTNone}, // time32 [milliseconds]
5732 : {"ttu", OFTTime, OFSTNone}, // time64 [microseconds]
5733 : {"ttn", OFTTime, OFSTNone}, // time64 [nanoseconds]
5734 : };
5735 :
5736 : const struct
5737 : {
5738 : const char arrowLetter;
5739 : OGRFieldType eType;
5740 : OGRFieldSubType eSubType;
5741 : } gasListTypes[] = {
5742 : {ARROW_LETTER_BOOLEAN, OFTIntegerList, OFSTBoolean},
5743 : {ARROW_LETTER_INT8, OFTIntegerList, OFSTInt16},
5744 : {ARROW_LETTER_UINT8, OFTIntegerList, OFSTInt16},
5745 : {ARROW_LETTER_INT16, OFTIntegerList, OFSTInt16},
5746 : {ARROW_LETTER_UINT16, OFTIntegerList, OFSTNone},
5747 : {ARROW_LETTER_INT32, OFTIntegerList, OFSTNone},
5748 : {ARROW_LETTER_UINT32, OFTInteger64List, OFSTNone},
5749 : {ARROW_LETTER_INT64, OFTInteger64List, OFSTNone},
5750 : {ARROW_LETTER_UINT64, OFTRealList,
5751 : OFSTNone}, //(potentially lossy conversion if going through OGRFeature)
5752 : {ARROW_LETTER_FLOAT16, OFTRealList, OFSTFloat32},
5753 : {ARROW_LETTER_FLOAT32, OFTRealList, OFSTFloat32},
5754 : {ARROW_LETTER_FLOAT64, OFTRealList, OFSTNone},
5755 : {ARROW_LETTER_STRING, OFTStringList, OFSTNone},
5756 : {ARROW_LETTER_LARGE_STRING, OFTStringList, OFSTNone},
5757 : };
5758 :
5759 43 : static inline bool IsValidDictionaryIndexType(const char *format)
5760 : {
5761 40 : return (format[0] == ARROW_LETTER_INT8 || format[0] == ARROW_LETTER_UINT8 ||
5762 37 : format[0] == ARROW_LETTER_INT16 ||
5763 34 : format[0] == ARROW_LETTER_UINT16 ||
5764 31 : format[0] == ARROW_LETTER_INT32 ||
5765 9 : format[0] == ARROW_LETTER_UINT32 ||
5766 6 : format[0] == ARROW_LETTER_INT64 ||
5767 89 : format[0] == ARROW_LETTER_UINT64) &&
5768 86 : format[1] == 0;
5769 : }
5770 :
5771 230 : static bool IsSupportForJSONObj(const struct ArrowSchema *schema)
5772 : {
5773 230 : const char *format = schema->format;
5774 230 : if (IsStructure(format))
5775 : {
5776 35 : for (int64_t i = 0; i < schema->n_children; ++i)
5777 : {
5778 26 : if (!IsSupportForJSONObj(schema->children[i]))
5779 0 : return false;
5780 : }
5781 9 : return true;
5782 : }
5783 :
5784 2752 : for (const auto &sType : gasListTypes)
5785 : {
5786 2626 : if (format[0] == sType.arrowLetter && format[1] == 0)
5787 : {
5788 95 : return true;
5789 : }
5790 : }
5791 :
5792 126 : if (IsBinary(format) || IsLargeBinary(format) || IsFixedWidthBinary(format))
5793 12 : return true;
5794 :
5795 114 : if (IsDecimal(format))
5796 : {
5797 6 : int nPrecision = 0;
5798 6 : int nScale = 0;
5799 6 : int nWidthInBytes = 0;
5800 6 : if (!ParseDecimalFormat(format, nPrecision, nScale, nWidthInBytes))
5801 : {
5802 0 : CPLError(CE_Failure, CPLE_AppDefined, "Invalid field format %s",
5803 : format);
5804 0 : return false;
5805 : }
5806 :
5807 6 : return GetErrorIfUnsupportedDecimal(nWidthInBytes, nPrecision) ==
5808 6 : nullptr;
5809 : }
5810 :
5811 108 : if (IsMap(format))
5812 : {
5813 74 : return IsStructure(schema->children[0]->format) &&
5814 148 : schema->children[0]->n_children == 2 &&
5815 222 : IsString(schema->children[0]->children[0]->format) &&
5816 148 : IsSupportForJSONObj(schema->children[0]->children[1]);
5817 : }
5818 :
5819 34 : if (IsList(format) || IsLargeList(format) || IsFixedSizeList(format))
5820 : {
5821 34 : return IsSupportForJSONObj(schema->children[0]);
5822 : }
5823 :
5824 0 : return false;
5825 : }
5826 :
5827 518 : static bool IsArrowSchemaSupportedInternal(const struct ArrowSchema *schema,
5828 : const std::string &osFieldPrefix,
5829 : std::string &osErrorMsg)
5830 : {
5831 0 : const auto AppendError = [&osErrorMsg](const std::string &osMsg)
5832 : {
5833 0 : if (!osErrorMsg.empty())
5834 0 : osErrorMsg += " ";
5835 0 : osErrorMsg += osMsg;
5836 518 : };
5837 :
5838 518 : const char *fieldName = schema->name;
5839 518 : const char *format = schema->format;
5840 518 : if (IsStructure(format))
5841 : {
5842 5 : bool bRet = true;
5843 5 : const std::string osNewPrefix(osFieldPrefix + fieldName + ".");
5844 21 : for (int64_t i = 0; i < schema->n_children; ++i)
5845 : {
5846 16 : if (!IsArrowSchemaSupportedInternal(schema->children[i],
5847 : osNewPrefix, osErrorMsg))
5848 0 : bRet = false;
5849 : }
5850 5 : return bRet;
5851 : }
5852 :
5853 513 : if (schema->dictionary)
5854 : {
5855 15 : if (!IsValidDictionaryIndexType(format))
5856 : {
5857 0 : AppendError("Dictionary only supported if the parent is of "
5858 : "type [U]Int[8|16|32|64]");
5859 0 : return false;
5860 : }
5861 :
5862 15 : schema = schema->dictionary;
5863 15 : format = schema->format;
5864 : }
5865 :
5866 513 : if (IsList(format) || IsLargeList(format) || IsFixedSizeList(format))
5867 : {
5868 : // Only some subtypes supported
5869 132 : const char *childFormat = schema->children[0]->format;
5870 1103 : for (const auto &sType : gasListTypes)
5871 : {
5872 1088 : if (childFormat[0] == sType.arrowLetter && childFormat[1] == 0)
5873 : {
5874 117 : return true;
5875 : }
5876 : }
5877 :
5878 15 : if (IsDecimal(childFormat))
5879 : {
5880 7 : int nPrecision = 0;
5881 7 : int nScale = 0;
5882 7 : int nWidthInBytes = 0;
5883 7 : if (!ParseDecimalFormat(childFormat, nPrecision, nScale,
5884 : nWidthInBytes))
5885 : {
5886 0 : AppendError(std::string("Invalid field format ") + childFormat +
5887 0 : " for field " + osFieldPrefix + fieldName);
5888 0 : return false;
5889 : }
5890 :
5891 : const char *pszError =
5892 7 : GetErrorIfUnsupportedDecimal(nWidthInBytes, nPrecision);
5893 7 : if (pszError)
5894 : {
5895 0 : AppendError(pszError);
5896 0 : return false;
5897 : }
5898 :
5899 7 : return true;
5900 : }
5901 :
5902 8 : if (IsSupportForJSONObj(schema))
5903 : {
5904 8 : return true;
5905 : }
5906 :
5907 0 : AppendError("Type list for field " + osFieldPrefix + fieldName +
5908 : " is not supported.");
5909 0 : return false;
5910 : }
5911 :
5912 381 : else if (IsMap(format))
5913 : {
5914 70 : if (IsSupportForJSONObj(schema))
5915 70 : return true;
5916 :
5917 0 : AppendError("Type map for field " + osFieldPrefix + fieldName +
5918 : " is not supported.");
5919 0 : return false;
5920 : }
5921 311 : else if (IsDecimal(format))
5922 : {
5923 6 : int nPrecision = 0;
5924 6 : int nScale = 0;
5925 6 : int nWidthInBytes = 0;
5926 6 : if (!ParseDecimalFormat(format, nPrecision, nScale, nWidthInBytes))
5927 : {
5928 0 : AppendError(std::string("Invalid field format ") + format +
5929 0 : " for field " + osFieldPrefix + fieldName);
5930 0 : return false;
5931 : }
5932 :
5933 : const char *pszError =
5934 6 : GetErrorIfUnsupportedDecimal(nWidthInBytes, nPrecision);
5935 6 : if (pszError)
5936 : {
5937 0 : AppendError(pszError);
5938 0 : return false;
5939 : }
5940 :
5941 6 : return true;
5942 : }
5943 : else
5944 : {
5945 3949 : for (const auto &sType : gasArrowTypesToOGR)
5946 : {
5947 3929 : if (strcmp(format, sType.arrowType) == 0)
5948 : {
5949 285 : return true;
5950 : }
5951 : }
5952 :
5953 20 : if (IsFixedWidthBinary(format) || IsTimestamp(format))
5954 20 : return true;
5955 :
5956 0 : AppendError("Type '" + std::string(format) + "' for field " +
5957 0 : osFieldPrefix + fieldName + " is not supported.");
5958 0 : return false;
5959 : }
5960 : }
5961 :
5962 : /** Returns whether the provided ArrowSchema is supported for writing.
5963 : *
5964 : * This method exists since not all drivers may support all Arrow data types.
5965 : *
5966 : * The ArrowSchema must be of type struct (format=+s)
5967 : *
5968 : * It is recommended to call this method before calling WriteArrowBatch().
5969 : *
5970 : * This is the same as the C function OGR_L_IsArrowSchemaSupported().
5971 : *
5972 : * @param schema Schema of type struct (format = '+s')
5973 : * @param papszOptions Options (none currently). Null terminated list, or nullptr.
5974 : * @param[out] osErrorMsg Reason of the failure, when this method returns false.
5975 : * @return true if the ArrowSchema is supported for writing.
5976 : * @since 3.8
5977 : */
5978 49 : bool OGRLayer::IsArrowSchemaSupported(const struct ArrowSchema *schema,
5979 : CPL_UNUSED CSLConstList papszOptions,
5980 : std::string &osErrorMsg) const
5981 : {
5982 49 : if (!IsStructure(schema->format))
5983 : {
5984 : osErrorMsg =
5985 : "IsArrowSchemaSupported() should be called on a schema that is a "
5986 1 : "struct of fields";
5987 1 : return false;
5988 : }
5989 :
5990 48 : bool bRet = true;
5991 550 : for (int64_t i = 0; i < schema->n_children; ++i)
5992 : {
5993 502 : if (!IsArrowSchemaSupportedInternal(schema->children[i], std::string(),
5994 : osErrorMsg))
5995 0 : bRet = false;
5996 : }
5997 48 : return bRet;
5998 : }
5999 :
6000 : /************************************************************************/
6001 : /* OGR_L_IsArrowSchemaSupported() */
6002 : /************************************************************************/
6003 :
6004 : /** Returns whether the provided ArrowSchema is supported for writing.
6005 : *
6006 : * This function exists since not all drivers may support all Arrow data types.
6007 : *
6008 : * The ArrowSchema must be of type struct (format=+s)
6009 : *
6010 : * It is recommended to call this function before calling OGR_L_WriteArrowBatch().
6011 : *
6012 : * This is the same as the C++ method OGRLayer::IsArrowSchemaSupported().
6013 : *
6014 : * @param hLayer Layer.
6015 : * @param schema Schema of type struct (format = '+s')
6016 : * @param papszOptions Options (none currently). Null terminated list, or nullptr.
6017 : * @param[out] ppszErrorMsg nullptr, or pointer to a string that will contain
6018 : * the reason of the failure, when this function returns false.
6019 : * @return true if the ArrowSchema is supported for writing.
6020 : * @since 3.8
6021 : */
6022 19 : bool OGR_L_IsArrowSchemaSupported(OGRLayerH hLayer,
6023 : const struct ArrowSchema *schema,
6024 : char **papszOptions, char **ppszErrorMsg)
6025 : {
6026 19 : VALIDATE_POINTER1(hLayer, __func__, false);
6027 19 : VALIDATE_POINTER1(schema, __func__, false);
6028 :
6029 38 : std::string osErrorMsg;
6030 38 : if (!OGRLayer::FromHandle(hLayer)->IsArrowSchemaSupported(
6031 19 : schema, papszOptions, osErrorMsg))
6032 : {
6033 4 : if (ppszErrorMsg)
6034 4 : *ppszErrorMsg = VSIStrdup(osErrorMsg.c_str());
6035 4 : return false;
6036 : }
6037 : else
6038 : {
6039 15 : if (ppszErrorMsg)
6040 15 : *ppszErrorMsg = nullptr;
6041 15 : return true;
6042 : }
6043 : }
6044 :
6045 : /************************************************************************/
6046 : /* IsKnownCodedFieldDomain() */
6047 : /************************************************************************/
6048 :
6049 34 : static bool IsKnownCodedFieldDomain(OGRLayer *poLayer,
6050 : const char *arrowMetadata)
6051 : {
6052 34 : if (arrowMetadata)
6053 : {
6054 6 : const auto oMetadata = OGRParseArrowMetadata(arrowMetadata);
6055 6 : for (const auto &oIter : oMetadata)
6056 : {
6057 6 : if (oIter.first == MD_GDAL_OGR_DOMAIN_NAME)
6058 : {
6059 6 : auto poDS = poLayer->GetDataset();
6060 6 : if (poDS)
6061 : {
6062 : const auto poFieldDomain =
6063 6 : poDS->GetFieldDomain(oIter.second);
6064 12 : if (poFieldDomain &&
6065 6 : poFieldDomain->GetDomainType() == OFDT_CODED)
6066 : {
6067 6 : return true;
6068 : }
6069 : }
6070 : }
6071 : }
6072 : }
6073 28 : return false;
6074 : }
6075 :
6076 : /************************************************************************/
6077 : /* OGRLayer::CreateFieldFromArrowSchema() */
6078 : /************************************************************************/
6079 :
6080 : //! @cond Doxygen_Suppress
6081 463 : bool OGRLayer::CreateFieldFromArrowSchemaInternal(
6082 : const struct ArrowSchema *schema, const std::string &osFieldPrefix,
6083 : CSLConstList papszOptions)
6084 : {
6085 463 : const char *fieldName = schema->name;
6086 463 : const char *format = schema->format;
6087 463 : if (IsStructure(format))
6088 : {
6089 10 : const std::string osNewPrefix(osFieldPrefix + fieldName + ".");
6090 21 : for (int64_t i = 0; i < schema->n_children; ++i)
6091 : {
6092 16 : if (!CreateFieldFromArrowSchemaInternal(schema->children[i],
6093 : osNewPrefix, papszOptions))
6094 0 : return false;
6095 : }
6096 5 : return true;
6097 : }
6098 :
6099 916 : CPLStringList aosNativeTypes;
6100 458 : auto poLayer = const_cast<OGRLayer *>(this);
6101 458 : auto poDS = poLayer->GetDataset();
6102 458 : if (poDS)
6103 : {
6104 458 : auto poDriver = poDS->GetDriver();
6105 458 : if (poDriver)
6106 : {
6107 : const char *pszMetadataItem =
6108 458 : poDriver->GetMetadataItem(GDAL_DMD_CREATIONFIELDDATATYPES);
6109 458 : if (pszMetadataItem)
6110 458 : aosNativeTypes = CSLTokenizeString2(pszMetadataItem, " ", 0);
6111 : }
6112 : }
6113 :
6114 473 : if (schema->dictionary &&
6115 15 : !IsKnownCodedFieldDomain(poLayer, schema->metadata))
6116 : {
6117 13 : if (!IsValidDictionaryIndexType(format))
6118 : {
6119 0 : CPLError(CE_Failure, CPLE_NotSupported,
6120 : "Dictionary only supported if the parent is of "
6121 : "type [U]Int[8|16|32|64]");
6122 0 : return false;
6123 : }
6124 :
6125 13 : schema = schema->dictionary;
6126 13 : format = schema->format;
6127 : }
6128 :
6129 458 : const auto AddField = [this, schema, fieldName, &aosNativeTypes,
6130 : &osFieldPrefix, poDS](OGRFieldType eTypeIn,
6131 : OGRFieldSubType eSubTypeIn,
6132 3239 : int nWidth, int nPrecision)
6133 : {
6134 458 : const char *pszTypeName = OGRFieldDefn::GetFieldTypeName(eTypeIn);
6135 458 : auto eTypeOut = eTypeIn;
6136 458 : auto eSubTypeOut = eSubTypeIn;
6137 916 : if (!aosNativeTypes.empty() &&
6138 458 : aosNativeTypes.FindString(pszTypeName) < 0)
6139 : {
6140 20 : eTypeOut = OFTString;
6141 20 : eSubTypeOut =
6142 15 : (eTypeIn == OFTIntegerList || eTypeIn == OFTInteger64List ||
6143 8 : eTypeIn == OFTRealList || eTypeIn == OFTStringList)
6144 35 : ? OFSTJSON
6145 : : OFSTNone;
6146 : }
6147 :
6148 916 : const std::string osWantedOGRFieldName = osFieldPrefix + fieldName;
6149 916 : OGRFieldDefn oFieldDefn(osWantedOGRFieldName.c_str(), eTypeOut);
6150 458 : oFieldDefn.SetSubType(eSubTypeOut);
6151 458 : if (eTypeOut == eTypeIn && eSubTypeOut == eSubTypeIn)
6152 : {
6153 438 : oFieldDefn.SetWidth(nWidth);
6154 438 : oFieldDefn.SetPrecision(nPrecision);
6155 : }
6156 458 : oFieldDefn.SetNullable((schema->flags & ARROW_FLAG_NULLABLE) != 0);
6157 :
6158 458 : if (schema->metadata)
6159 : {
6160 56 : const auto oMetadata = OGRParseArrowMetadata(schema->metadata);
6161 57 : for (const auto &oIter : oMetadata)
6162 : {
6163 29 : if (oIter.first == MD_GDAL_OGR_TYPE)
6164 : {
6165 3 : const auto &osType = oIter.second;
6166 36 : for (auto eType = OFTInteger; eType <= OFTMaxType;)
6167 : {
6168 36 : if (OGRFieldDefn::GetFieldTypeName(eType) == osType)
6169 : {
6170 3 : oFieldDefn.SetType(eType);
6171 3 : break;
6172 : }
6173 33 : if (eType == OFTMaxType)
6174 0 : break;
6175 : else
6176 33 : eType = static_cast<OGRFieldType>(eType + 1);
6177 : }
6178 : }
6179 26 : else if (oIter.first == MD_GDAL_OGR_ALTERNATIVE_NAME)
6180 2 : oFieldDefn.SetAlternativeName(oIter.second.c_str());
6181 24 : else if (oIter.first == MD_GDAL_OGR_COMMENT)
6182 2 : oFieldDefn.SetComment(oIter.second);
6183 22 : else if (oIter.first == MD_GDAL_OGR_DEFAULT)
6184 2 : oFieldDefn.SetDefault(oIter.second.c_str());
6185 20 : else if (oIter.first == MD_GDAL_OGR_SUBTYPE)
6186 : {
6187 5 : if (eTypeIn == eTypeOut)
6188 : {
6189 4 : const auto &osSubType = oIter.second;
6190 4 : for (auto eSubType = OFSTNone;
6191 15 : eSubType <= OFSTMaxSubType;)
6192 : {
6193 15 : if (OGRFieldDefn::GetFieldSubTypeName(eSubType) ==
6194 : osSubType)
6195 : {
6196 4 : oFieldDefn.SetSubType(eSubType);
6197 4 : break;
6198 : }
6199 11 : if (eSubType == OFSTMaxSubType)
6200 0 : break;
6201 : else
6202 11 : eSubType =
6203 11 : static_cast<OGRFieldSubType>(eSubType + 1);
6204 : }
6205 : }
6206 : }
6207 15 : else if (oIter.first == MD_GDAL_OGR_WIDTH)
6208 4 : oFieldDefn.SetWidth(atoi(oIter.second.c_str()));
6209 11 : else if (oIter.first == MD_GDAL_OGR_UNIQUE)
6210 2 : oFieldDefn.SetUnique(oIter.second == "true");
6211 9 : else if (oIter.first == MD_GDAL_OGR_DOMAIN_NAME)
6212 : {
6213 2 : if (poDS && poDS->GetFieldDomain(oIter.second))
6214 2 : oFieldDefn.SetDomainName(oIter.second);
6215 : }
6216 13 : else if (oIter.first == ARROW_EXTENSION_NAME_KEY &&
6217 6 : oIter.second == EXTENSION_NAME_ARROW_JSON)
6218 : {
6219 6 : oFieldDefn.SetSubType(OFSTJSON);
6220 : }
6221 : else
6222 : {
6223 1 : CPLDebug("OGR", "Unknown field metadata: %s",
6224 : oIter.first.c_str());
6225 : }
6226 : }
6227 : }
6228 458 : auto poLayerDefn = GetLayerDefn();
6229 458 : const int nFieldCountBefore = poLayerDefn->GetFieldCount();
6230 916 : if (CreateField(&oFieldDefn) != OGRERR_NONE ||
6231 458 : nFieldCountBefore + 1 != poLayerDefn->GetFieldCount())
6232 : {
6233 0 : return false;
6234 : }
6235 : const char *pszActualFieldName =
6236 458 : poLayerDefn->GetFieldDefn(nFieldCountBefore)->GetNameRef();
6237 458 : if (pszActualFieldName != osWantedOGRFieldName)
6238 : {
6239 : m_poPrivate
6240 1 : ->m_oMapArrowFieldNameToOGRFieldName[osWantedOGRFieldName] =
6241 1 : pszActualFieldName;
6242 : }
6243 458 : return true;
6244 458 : };
6245 :
6246 8084 : for (const auto &sType : gasArrowTypesToOGR)
6247 : {
6248 7853 : if (strcmp(format, sType.arrowType) == 0)
6249 : {
6250 227 : return AddField(sType.eType, sType.eSubType, 0, 0);
6251 : }
6252 : }
6253 :
6254 231 : if (IsMap(format))
6255 : {
6256 70 : return AddField(OFTString, OFSTJSON, 0, 0);
6257 : }
6258 :
6259 161 : if (IsTimestamp(format))
6260 : {
6261 20 : return AddField(OFTDateTime, OFSTNone, 0, 0);
6262 : }
6263 :
6264 141 : if (IsFixedWidthBinary(format))
6265 : {
6266 3 : return AddField(OFTBinary, OFSTNone, GetFixedWithBinary(format), 0);
6267 : }
6268 :
6269 138 : if (IsList(format) || IsLargeList(format) || IsFixedSizeList(format))
6270 : {
6271 132 : const char *childFormat = schema->children[0]->format;
6272 1103 : for (const auto &sType : gasListTypes)
6273 : {
6274 1088 : if (childFormat[0] == sType.arrowLetter && childFormat[1] == 0)
6275 : {
6276 117 : return AddField(sType.eType, sType.eSubType, 0, 0);
6277 : }
6278 : }
6279 :
6280 15 : if (IsDecimal(childFormat))
6281 : {
6282 7 : int nPrecision = 0;
6283 7 : int nScale = 0;
6284 7 : int nWidthInBytes = 0;
6285 7 : if (!ParseDecimalFormat(childFormat, nPrecision, nScale,
6286 : nWidthInBytes))
6287 : {
6288 0 : CPLError(CE_Failure, CPLE_AppDefined, "%s",
6289 0 : (std::string("Invalid field format ") + format +
6290 0 : " for field " + osFieldPrefix + fieldName)
6291 : .c_str());
6292 0 : return false;
6293 : }
6294 :
6295 : const char *pszError =
6296 7 : GetErrorIfUnsupportedDecimal(nWidthInBytes, nPrecision);
6297 7 : if (pszError)
6298 : {
6299 0 : CPLError(CE_Failure, CPLE_NotSupported, "%s", pszError);
6300 0 : return false;
6301 : }
6302 :
6303 : // DBF convention: add space for negative sign and decimal separator
6304 7 : return AddField(OFTRealList, OFSTNone, nPrecision + 2, nScale);
6305 : }
6306 :
6307 8 : if (IsSupportForJSONObj(schema->children[0]))
6308 : {
6309 8 : return AddField(OFTString, OFSTJSON, 0, 0);
6310 : }
6311 :
6312 0 : CPLError(CE_Failure, CPLE_NotSupported, "%s",
6313 0 : ("List of type '" + std::string(childFormat) + "' for field " +
6314 0 : osFieldPrefix + fieldName + " is not supported.")
6315 : .c_str());
6316 0 : return false;
6317 : }
6318 :
6319 6 : if (IsDecimal(format))
6320 : {
6321 6 : int nPrecision = 0;
6322 6 : int nScale = 0;
6323 6 : int nWidthInBytes = 0;
6324 6 : if (!ParseDecimalFormat(format, nPrecision, nScale, nWidthInBytes))
6325 : {
6326 0 : CPLError(CE_Failure, CPLE_AppDefined, "%s",
6327 0 : (std::string("Invalid field format ") + format +
6328 0 : " for field " + osFieldPrefix + fieldName)
6329 : .c_str());
6330 0 : return false;
6331 : }
6332 :
6333 : const char *pszError =
6334 6 : GetErrorIfUnsupportedDecimal(nWidthInBytes, nPrecision);
6335 6 : if (pszError)
6336 : {
6337 0 : CPLError(CE_Failure, CPLE_NotSupported, "%s", pszError);
6338 0 : return false;
6339 : }
6340 :
6341 : // DBF convention: add space for negative sign and decimal separator
6342 6 : return AddField(OFTReal, OFSTNone, nPrecision + 2, nScale);
6343 : }
6344 :
6345 0 : CPLError(CE_Failure, CPLE_NotSupported, "%s",
6346 0 : ("Type '" + std::string(format) + "' for field " + osFieldPrefix +
6347 0 : fieldName + " is not supported.")
6348 : .c_str());
6349 0 : return false;
6350 : }
6351 :
6352 : //! @endcond
6353 :
6354 : /** Creates a field from an ArrowSchema.
6355 : *
6356 : * This should only be used for attribute fields. Geometry fields should
6357 : * be created with CreateGeomField(). The FID field should also not be
6358 : * passed with this method.
6359 : *
6360 : * Contrary to the IsArrowSchemaSupported() and WriteArrowBatch() methods, the
6361 : * passed schema must be for an individual field, and thus, is *not* of type
6362 : * struct (format=+s) (unless writing a set of fields grouped together in the
6363 : * same structure).
6364 : *
6365 : * Additional field metadata can be speciffed through the ArrowSchema::metadata
6366 : * field with the potential following items:
6367 : * <ul>
6368 : * <li>"GDAL:OGR:alternative_name": value of
6369 : * OGRFieldDefn::GetAlternativeNameRef()</li>
6370 : * <li>"GDAL:OGR:comment": value of OGRFieldDefn::GetComment()</li>
6371 : * <li>"GDAL:OGR:default": value of OGRFieldDefn::GetDefault()</li>
6372 : * <li>"GDAL:OGR:subtype": value of OGRFieldDefn::GetSubType()</li>
6373 : * <li>"GDAL:OGR:width": value of OGRFieldDefn::GetWidth() (serialized as a
6374 : * string)</li>
6375 : * <li>"GDAL:OGR:unique": value of OGRFieldDefn::IsUnique() (serialized as
6376 : * "true" or "false")</li>
6377 : * <li>"GDAL:OGR:domain_name": value of OGRFieldDefn::GetDomainName()</li>
6378 : * </ul>
6379 : *
6380 : * This method and CreateField() are mutually exclusive in the same session.
6381 : *
6382 : * This method is the same as the C function OGR_L_CreateFieldFromArrowSchema().
6383 : *
6384 : * @param schema Schema of the field to create.
6385 : * @param papszOptions Options (none currently). Null terminated list, or nullptr.
6386 : * @return true in case of success
6387 : * @since 3.8
6388 : */
6389 447 : bool OGRLayer::CreateFieldFromArrowSchema(const struct ArrowSchema *schema,
6390 : CSLConstList papszOptions)
6391 : {
6392 894 : return CreateFieldFromArrowSchemaInternal(schema, std::string(),
6393 894 : papszOptions);
6394 : }
6395 :
6396 : /************************************************************************/
6397 : /* OGR_L_CreateFieldFromArrowSchema() */
6398 : /************************************************************************/
6399 :
6400 : /** Creates a field from an ArrowSchema.
6401 : *
6402 : * This should only be used for attribute fields. Geometry fields should
6403 : * be created with CreateGeomField(). The FID field should also not be
6404 : * passed with this method.
6405 : *
6406 : * Contrary to the IsArrowSchemaSupported() and WriteArrowBatch() methods, the
6407 : * passed schema must be for an individual field, and thus, is *not* of type
6408 : * struct (format=+s) (unless writing a set of fields grouped together in the
6409 : * same structure).
6410 : *
6411 : * Additional field metadata can be speciffed through the ArrowSchema::metadata
6412 : * field with the potential following items:
6413 : * <ul>
6414 : * <li>"GDAL:OGR:alternative_name": value of
6415 : * OGRFieldDefn::GetAlternativeNameRef()</li>
6416 : * <li>"GDAL:OGR:comment": value of OGRFieldDefn::GetComment()</li>
6417 : * <li>"GDAL:OGR:default": value of OGRFieldDefn::GetDefault()</li>
6418 : * <li>"GDAL:OGR:subtype": value of OGRFieldDefn::GetSubType()</li>
6419 : * <li>"GDAL:OGR:width": value of OGRFieldDefn::GetWidth() (serialized as a
6420 : * string)</li>
6421 : * <li>"GDAL:OGR:unique": value of OGRFieldDefn::IsUnique() (serialized as
6422 : * "true" or "false")</li>
6423 : * <li>"GDAL:OGR:domain_name": value of OGRFieldDefn::GetDomainName()</li>
6424 : * </ul>
6425 : *
6426 : * This method and CreateField() are mutually exclusive in the same session.
6427 : *
6428 : * This method is the same as the C++ method OGRLayer::CreateFieldFromArrowSchema().
6429 : *
6430 : * @param hLayer Layer.
6431 : * @param schema Schema of the field to create.
6432 : * @param papszOptions Options (none currently). Null terminated list, or nullptr.
6433 : * @return true in case of success
6434 : * @since 3.8
6435 : */
6436 541 : bool OGR_L_CreateFieldFromArrowSchema(OGRLayerH hLayer,
6437 : const struct ArrowSchema *schema,
6438 : char **papszOptions)
6439 : {
6440 541 : VALIDATE_POINTER1(hLayer, __func__, false);
6441 541 : VALIDATE_POINTER1(schema, __func__, false);
6442 :
6443 1082 : return OGRLayer::FromHandle(hLayer)->CreateFieldFromArrowSchema(
6444 541 : schema, papszOptions);
6445 : }
6446 :
6447 : /************************************************************************/
6448 : /* BuildOGRFieldInfo() */
6449 : /************************************************************************/
6450 :
6451 : constexpr int FID_COLUMN_SPECIAL_OGR_FIELD_IDX = -2;
6452 :
6453 : struct FieldInfo
6454 : {
6455 : std::string osName{};
6456 : int iOGRFieldIdx = -1;
6457 : const char *format = nullptr;
6458 : OGRFieldType eNominalFieldType =
6459 : OFTMaxType; // OGR data type that would best match the Arrow type
6460 : OGRFieldType eTargetFieldType =
6461 : OFTMaxType; // actual OGR data type of the layer field
6462 : // OGR data type of the feature passed to FillFeature()
6463 : OGRFieldType eSetFeatureFieldType = OFTMaxType;
6464 : bool bIsGeomCol = false;
6465 : bool bUseDictionary = false;
6466 : bool bUseStringOptim = false;
6467 : int nWidthInBytes = 0; // only used for decimal fields
6468 : int nPrecision = 0; // only used for decimal fields
6469 : int nScale = 0; // only used for decimal fields
6470 : };
6471 :
6472 755 : static bool BuildOGRFieldInfo(
6473 : const struct ArrowSchema *schema, struct ArrowArray *array,
6474 : const OGRFeatureDefn *poFeatureDefn, const std::string &osFieldPrefix,
6475 : const CPLStringList &aosNativeTypes, bool &bFallbackTypesUsed,
6476 : std::vector<FieldInfo> &asFieldInfo, const char *pszFIDName,
6477 : const char *pszGeomFieldName, OGRLayer *poLayer,
6478 : const std::map<std::string, std::string> &oMapArrowFieldNameToOGRFieldName,
6479 : const struct ArrowSchema *&schemaFIDColumn,
6480 : struct ArrowArray *&arrayFIDColumn)
6481 : {
6482 755 : const char *fieldName = schema->name;
6483 755 : const char *format = schema->format;
6484 755 : if (IsStructure(format))
6485 : {
6486 18 : const std::string osNewPrefix(osFieldPrefix + fieldName + ".");
6487 39 : for (int64_t i = 0; i < array->n_children; ++i)
6488 : {
6489 30 : if (!BuildOGRFieldInfo(schema->children[i], array->children[i],
6490 : poFeatureDefn, osNewPrefix, aosNativeTypes,
6491 : bFallbackTypesUsed, asFieldInfo, pszFIDName,
6492 : pszGeomFieldName, poLayer,
6493 : oMapArrowFieldNameToOGRFieldName,
6494 : schemaFIDColumn, arrayFIDColumn))
6495 : {
6496 0 : return false;
6497 : }
6498 : }
6499 9 : return true;
6500 : }
6501 :
6502 1492 : FieldInfo sInfo;
6503 :
6504 765 : if (schema->dictionary &&
6505 19 : !IsKnownCodedFieldDomain(poLayer, schema->metadata))
6506 : {
6507 15 : if (!IsValidDictionaryIndexType(format))
6508 : {
6509 0 : CPLError(CE_Failure, CPLE_NotSupported,
6510 : "Dictionary only supported if the parent is of "
6511 : "type [U]Int[8|16|32|64]");
6512 0 : return false;
6513 : }
6514 :
6515 15 : sInfo.bUseDictionary = true;
6516 15 : schema = schema->dictionary;
6517 15 : format = schema->format;
6518 15 : array = array->dictionary;
6519 : }
6520 :
6521 746 : sInfo.osName = osFieldPrefix + fieldName;
6522 746 : sInfo.format = format;
6523 746 : if (pszFIDName && sInfo.osName == pszFIDName)
6524 : {
6525 29 : if (IsInt32(format) || IsInt64(format))
6526 : {
6527 28 : sInfo.iOGRFieldIdx = FID_COLUMN_SPECIAL_OGR_FIELD_IDX;
6528 28 : schemaFIDColumn = schema;
6529 28 : arrayFIDColumn = array;
6530 : }
6531 : else
6532 : {
6533 1 : CPLError(CE_Failure, CPLE_AppDefined,
6534 : "FID column '%s' should be of Arrow format 'i' "
6535 : "(int32) or 'l' (int64)",
6536 : sInfo.osName.c_str());
6537 1 : return false;
6538 : }
6539 : }
6540 : else
6541 : {
6542 : const std::string &osExpectedOGRFieldName =
6543 2150 : [&oMapArrowFieldNameToOGRFieldName, &sInfo]() -> const std::string &
6544 : {
6545 : const auto oIter =
6546 717 : oMapArrowFieldNameToOGRFieldName.find(sInfo.osName);
6547 717 : if (oIter != oMapArrowFieldNameToOGRFieldName.end())
6548 1 : return oIter->second;
6549 716 : return sInfo.osName;
6550 717 : }();
6551 717 : sInfo.iOGRFieldIdx =
6552 717 : poFeatureDefn->GetFieldIndex(osExpectedOGRFieldName.c_str());
6553 717 : if (sInfo.iOGRFieldIdx >= 0)
6554 : {
6555 646 : bool bTypeOK = false;
6556 : const auto eOGRType =
6557 646 : poFeatureDefn->GetFieldDefn(sInfo.iOGRFieldIdx)->GetType();
6558 646 : sInfo.eTargetFieldType = eOGRType;
6559 11606 : for (const auto &sType : gasArrowTypesToOGR)
6560 : {
6561 11263 : if (strcmp(format, sType.arrowType) == 0)
6562 : {
6563 303 : sInfo.bUseStringOptim = sType.eType == OFTString;
6564 303 : sInfo.eNominalFieldType = sType.eType;
6565 303 : if (eOGRType == sInfo.eNominalFieldType)
6566 : {
6567 273 : bTypeOK = true;
6568 273 : break;
6569 : }
6570 30 : else if (eOGRType == OFTString)
6571 : {
6572 4 : bFallbackTypesUsed = true;
6573 4 : bTypeOK = true;
6574 4 : break;
6575 : }
6576 26 : else if (eOGRType == OFTInteger &&
6577 10 : sType.eType == OFTInteger64)
6578 : {
6579 : // Potentially lossy.
6580 4 : CPLDebug("OGR",
6581 : "For field %s, writing from Arrow array of "
6582 : "type Int64 into OGR Int32 field. "
6583 : "Potentially loss conversion can happen",
6584 : sInfo.osName.c_str());
6585 4 : bFallbackTypesUsed = true;
6586 4 : bTypeOK = true;
6587 4 : break;
6588 : }
6589 22 : else if (eOGRType == OFTInteger && sType.eType == OFTReal)
6590 : {
6591 : // Potentially lossy.
6592 6 : CPLDebug("OGR",
6593 : "For field %s, writing from Arrow array of "
6594 : "type Real into OGR Int32 field. "
6595 : "Potentially loss conversion can happen",
6596 : sInfo.osName.c_str());
6597 6 : bFallbackTypesUsed = true;
6598 6 : bTypeOK = true;
6599 6 : break;
6600 : }
6601 16 : else if (eOGRType == OFTInteger64 && sType.eType == OFTReal)
6602 : {
6603 : // Potentially lossy.
6604 6 : CPLDebug("OGR",
6605 : "For field %s, writing from Arrow array of "
6606 : "type Real into OGR Int64 field. "
6607 : "Potentially loss conversion can happen",
6608 : sInfo.osName.c_str());
6609 6 : bFallbackTypesUsed = true;
6610 6 : bTypeOK = true;
6611 6 : break;
6612 : }
6613 10 : else if (eOGRType == OFTReal && sType.eType == OFTInteger64)
6614 : {
6615 : // Potentially lossy.
6616 4 : CPLDebug("OGR",
6617 : "For field %s, writing from Arrow array of "
6618 : "type Int64 into OGR Real field. "
6619 : "Potentially loss conversion can happen",
6620 : sInfo.osName.c_str());
6621 4 : bFallbackTypesUsed = true;
6622 4 : bTypeOK = true;
6623 4 : break;
6624 : }
6625 6 : else if ((eOGRType == OFTInteger64 ||
6626 4 : eOGRType == OFTReal) &&
6627 4 : sType.eType == OFTInteger)
6628 : {
6629 : // Non-lossy
6630 4 : bFallbackTypesUsed = true;
6631 4 : bTypeOK = true;
6632 4 : break;
6633 : }
6634 2 : else if (eOGRType == OFTDateTime &&
6635 2 : sType.eType == OFTString)
6636 : {
6637 2 : bFallbackTypesUsed = true;
6638 2 : bTypeOK = true;
6639 2 : break;
6640 : }
6641 : else
6642 : {
6643 0 : CPLError(CE_Failure, CPLE_AppDefined,
6644 : "For field %s, OGR field type is %s whereas "
6645 : "Arrow type implies %s",
6646 : sInfo.osName.c_str(),
6647 : OGR_GetFieldTypeName(eOGRType),
6648 0 : OGR_GetFieldTypeName(sType.eType));
6649 0 : return false;
6650 : }
6651 : }
6652 : }
6653 :
6654 646 : if (!bTypeOK && IsMap(format))
6655 : {
6656 106 : sInfo.eNominalFieldType = OFTString;
6657 106 : if (eOGRType == sInfo.eNominalFieldType)
6658 : {
6659 106 : bTypeOK = true;
6660 : }
6661 : else
6662 : {
6663 0 : CPLError(CE_Failure, CPLE_AppDefined,
6664 : "For field %s, OGR field type is %s whereas "
6665 : "Arrow type implies %s",
6666 : sInfo.osName.c_str(),
6667 : OGR_GetFieldTypeName(eOGRType),
6668 : OGR_GetFieldTypeName(OFTString));
6669 0 : return false;
6670 : }
6671 : }
6672 :
6673 646 : if (!bTypeOK && IsTimestamp(format))
6674 : {
6675 32 : sInfo.eNominalFieldType = OFTDateTime;
6676 32 : if (eOGRType == sInfo.eNominalFieldType)
6677 : {
6678 31 : bTypeOK = true;
6679 : }
6680 1 : else if (eOGRType == OFTString)
6681 : {
6682 1 : bFallbackTypesUsed = true;
6683 1 : bTypeOK = true;
6684 : }
6685 : else
6686 : {
6687 0 : CPLError(CE_Failure, CPLE_AppDefined,
6688 : "For field %s, OGR field type is %s whereas "
6689 : "Arrow type implies %s",
6690 : sInfo.osName.c_str(),
6691 : OGR_GetFieldTypeName(eOGRType),
6692 : OGR_GetFieldTypeName(OFTDateTime));
6693 0 : return false;
6694 : }
6695 : }
6696 :
6697 646 : if (!bTypeOK && IsFixedWidthBinary(format))
6698 : {
6699 5 : sInfo.eNominalFieldType = OFTBinary;
6700 5 : if (eOGRType == sInfo.eNominalFieldType)
6701 : {
6702 5 : bTypeOK = true;
6703 : }
6704 0 : else if (eOGRType == OFTString)
6705 : {
6706 0 : bFallbackTypesUsed = true;
6707 0 : bTypeOK = true;
6708 : }
6709 : else
6710 : {
6711 0 : CPLError(CE_Failure, CPLE_AppDefined,
6712 : "For field %s, OGR field type is %s whereas "
6713 : "Arrow type implies %s",
6714 : sInfo.osName.c_str(),
6715 : OGR_GetFieldTypeName(eOGRType),
6716 : OGR_GetFieldTypeName(OFTBinary));
6717 0 : return false;
6718 : }
6719 : }
6720 :
6721 719 : if (!bTypeOK && (IsList(format) || IsLargeList(format) ||
6722 73 : IsFixedSizeList(format)))
6723 : {
6724 190 : const char *childFormat = schema->children[0]->format;
6725 1565 : for (const auto &sType : gasListTypes)
6726 : {
6727 1544 : if (childFormat[0] == sType.arrowLetter &&
6728 169 : childFormat[1] == 0)
6729 : {
6730 169 : sInfo.eNominalFieldType = sType.eType;
6731 169 : if (eOGRType == sInfo.eNominalFieldType)
6732 : {
6733 154 : bTypeOK = true;
6734 154 : break;
6735 : }
6736 15 : else if (eOGRType == OFTString)
6737 : {
6738 15 : bFallbackTypesUsed = true;
6739 15 : bTypeOK = true;
6740 15 : break;
6741 : }
6742 : else
6743 : {
6744 0 : CPLError(CE_Failure, CPLE_AppDefined,
6745 : "For field %s, OGR field type is %s "
6746 : "whereas "
6747 : "Arrow type implies %s",
6748 : sInfo.osName.c_str(),
6749 : OGR_GetFieldTypeName(eOGRType),
6750 0 : OGR_GetFieldTypeName(sType.eType));
6751 0 : return false;
6752 : }
6753 : }
6754 : }
6755 :
6756 190 : if (!bTypeOK && IsDecimal(childFormat))
6757 : {
6758 11 : if (!ParseDecimalFormat(childFormat, sInfo.nPrecision,
6759 : sInfo.nScale, sInfo.nWidthInBytes))
6760 : {
6761 0 : CPLError(CE_Failure, CPLE_AppDefined, "%s",
6762 0 : (std::string("Invalid field format ") +
6763 0 : childFormat + " for field " + osFieldPrefix +
6764 : fieldName)
6765 : .c_str());
6766 0 : return false;
6767 : }
6768 :
6769 11 : const char *pszError = GetErrorIfUnsupportedDecimal(
6770 : sInfo.nWidthInBytes, sInfo.nPrecision);
6771 11 : if (pszError)
6772 : {
6773 0 : CPLError(CE_Failure, CPLE_NotSupported, "%s", pszError);
6774 0 : return false;
6775 : }
6776 :
6777 11 : sInfo.eNominalFieldType = OFTRealList;
6778 11 : if (eOGRType == sInfo.eNominalFieldType)
6779 : {
6780 11 : bTypeOK = true;
6781 : }
6782 0 : else if (eOGRType == OFTString)
6783 : {
6784 0 : bFallbackTypesUsed = true;
6785 0 : bTypeOK = true;
6786 : }
6787 : else
6788 : {
6789 0 : CPLError(CE_Failure, CPLE_AppDefined,
6790 : "For field %s, OGR field type is %s whereas "
6791 : "Arrow type implies %s",
6792 : sInfo.osName.c_str(),
6793 : OGR_GetFieldTypeName(eOGRType),
6794 : OGR_GetFieldTypeName(OFTRealList));
6795 0 : return false;
6796 : }
6797 : }
6798 :
6799 190 : if (!bTypeOK && IsSupportForJSONObj(schema->children[0]))
6800 : {
6801 10 : sInfo.eNominalFieldType = OFTString;
6802 10 : if (eOGRType == sInfo.eNominalFieldType)
6803 : {
6804 10 : bTypeOK = true;
6805 : }
6806 : else
6807 : {
6808 0 : CPLError(CE_Failure, CPLE_AppDefined,
6809 : "For field %s, OGR field type is %s whereas "
6810 : "Arrow type implies %s",
6811 : sInfo.osName.c_str(),
6812 : OGR_GetFieldTypeName(eOGRType),
6813 : OGR_GetFieldTypeName(OFTString));
6814 0 : return false;
6815 : }
6816 : }
6817 :
6818 190 : if (!bTypeOK)
6819 : {
6820 0 : CPLError(CE_Failure, CPLE_NotSupported, "%s",
6821 0 : ("List of type '" + std::string(childFormat) +
6822 0 : "' for field " + osFieldPrefix + fieldName +
6823 : " is not supported.")
6824 : .c_str());
6825 0 : return false;
6826 : }
6827 : }
6828 :
6829 646 : if (!bTypeOK && IsDecimal(format))
6830 : {
6831 10 : if (!ParseDecimalFormat(format, sInfo.nPrecision, sInfo.nScale,
6832 : sInfo.nWidthInBytes))
6833 : {
6834 0 : CPLError(CE_Failure, CPLE_AppDefined, "%s",
6835 0 : (std::string("Invalid field format ") + format +
6836 0 : " for field " + osFieldPrefix + fieldName)
6837 : .c_str());
6838 0 : return false;
6839 : }
6840 :
6841 10 : const char *pszError = GetErrorIfUnsupportedDecimal(
6842 : sInfo.nWidthInBytes, sInfo.nPrecision);
6843 10 : if (pszError)
6844 : {
6845 0 : CPLError(CE_Failure, CPLE_NotSupported, "%s", pszError);
6846 0 : return false;
6847 : }
6848 :
6849 10 : sInfo.eNominalFieldType = OFTReal;
6850 10 : if (eOGRType == sInfo.eNominalFieldType)
6851 : {
6852 10 : bTypeOK = true;
6853 : }
6854 0 : else if (eOGRType == OFTString)
6855 : {
6856 0 : bFallbackTypesUsed = true;
6857 0 : bTypeOK = true;
6858 : }
6859 : else
6860 : {
6861 0 : CPLError(CE_Failure, CPLE_AppDefined,
6862 : "For field %s, OGR field type is %s whereas "
6863 : "Arrow type implies %s",
6864 : sInfo.osName.c_str(),
6865 : OGR_GetFieldTypeName(eOGRType),
6866 : OGR_GetFieldTypeName(OFTReal));
6867 0 : return false;
6868 : }
6869 : }
6870 :
6871 646 : if (!bTypeOK)
6872 : {
6873 0 : CPLError(CE_Failure, CPLE_NotSupported, "%s",
6874 0 : ("Type '" + std::string(format) + "' for field " +
6875 0 : osFieldPrefix + fieldName + " is not supported.")
6876 : .c_str());
6877 0 : return false;
6878 : }
6879 : }
6880 : else
6881 : {
6882 71 : sInfo.iOGRFieldIdx = poFeatureDefn->GetGeomFieldIndex(
6883 71 : osExpectedOGRFieldName.c_str());
6884 71 : if (sInfo.iOGRFieldIdx < 0)
6885 : {
6886 51 : if (pszGeomFieldName && pszGeomFieldName == sInfo.osName)
6887 : {
6888 46 : if (poFeatureDefn->GetGeomFieldCount() == 0)
6889 : {
6890 0 : CPLError(CE_Failure, CPLE_AppDefined,
6891 : "Cannot find OGR geometry field for Arrow "
6892 : "array %s",
6893 : sInfo.osName.c_str());
6894 0 : return false;
6895 : }
6896 46 : sInfo.iOGRFieldIdx = 0;
6897 : }
6898 : else
6899 : {
6900 : // Check if ARROW:extension:name = ogc.wkb or geoarrow.wkb
6901 5 : const char *pabyMetadata = schema->metadata;
6902 5 : if (pabyMetadata)
6903 : {
6904 : const auto oMetadata =
6905 5 : OGRParseArrowMetadata(pabyMetadata);
6906 5 : auto oIter = oMetadata.find(ARROW_EXTENSION_NAME_KEY);
6907 10 : if (oIter != oMetadata.end() &&
6908 5 : (oIter->second == EXTENSION_NAME_OGC_WKB ||
6909 0 : oIter->second == EXTENSION_NAME_GEOARROW_WKB))
6910 : {
6911 5 : if (poFeatureDefn->GetGeomFieldCount() == 0)
6912 : {
6913 0 : CPLError(CE_Failure, CPLE_AppDefined,
6914 : "Cannot find OGR geometry field "
6915 : "for Arrow array %s",
6916 : sInfo.osName.c_str());
6917 0 : return false;
6918 : }
6919 5 : sInfo.iOGRFieldIdx = 0;
6920 : }
6921 : }
6922 : }
6923 :
6924 51 : if (sInfo.iOGRFieldIdx < 0)
6925 : {
6926 0 : CPLError(CE_Failure, CPLE_AppDefined,
6927 : "Cannot find OGR field for Arrow array %s",
6928 : sInfo.osName.c_str());
6929 0 : return false;
6930 : }
6931 : }
6932 :
6933 71 : if (!IsBinary(format) && !IsLargeBinary(format))
6934 : {
6935 0 : CPLError(CE_Failure, CPLE_AppDefined,
6936 : "Geometry column '%s' should be of Arrow format "
6937 : "'z' (binary) or 'Z' (large binary)",
6938 : sInfo.osName.c_str());
6939 0 : return false;
6940 : }
6941 71 : sInfo.bIsGeomCol = true;
6942 : }
6943 : }
6944 :
6945 745 : asFieldInfo.emplace_back(std::move(sInfo));
6946 745 : return true;
6947 : }
6948 :
6949 : /************************************************************************/
6950 : /* GetUInt64Value() */
6951 : /************************************************************************/
6952 :
6953 90 : static inline uint64_t GetUInt64Value(const struct ArrowSchema *schema,
6954 : const struct ArrowArray *array,
6955 : size_t iFeature)
6956 : {
6957 90 : uint64_t nVal = 0;
6958 90 : CPLAssert(schema->format[1] == 0);
6959 90 : switch (schema->format[0])
6960 : {
6961 8 : case ARROW_LETTER_INT8:
6962 8 : nVal = GetValue<int8_t>(array, iFeature);
6963 8 : break;
6964 8 : case ARROW_LETTER_UINT8:
6965 8 : nVal = GetValue<uint8_t>(array, iFeature);
6966 8 : break;
6967 8 : case ARROW_LETTER_INT16:
6968 8 : nVal = GetValue<int16_t>(array, iFeature);
6969 8 : break;
6970 8 : case ARROW_LETTER_UINT16:
6971 8 : nVal = GetValue<uint16_t>(array, iFeature);
6972 8 : break;
6973 34 : case ARROW_LETTER_INT32:
6974 34 : nVal = GetValue<int32_t>(array, iFeature);
6975 34 : break;
6976 8 : case ARROW_LETTER_UINT32:
6977 8 : nVal = GetValue<uint32_t>(array, iFeature);
6978 8 : break;
6979 8 : case ARROW_LETTER_INT64:
6980 8 : nVal = GetValue<int64_t>(array, iFeature);
6981 8 : break;
6982 8 : case ARROW_LETTER_UINT64:
6983 8 : nVal = GetValue<uint64_t>(array, iFeature);
6984 8 : break;
6985 0 : default:
6986 : // Shouldn't happen given checks in BuildOGRFieldInfo()
6987 0 : CPLAssert(false);
6988 : break;
6989 : }
6990 90 : return nVal;
6991 : }
6992 :
6993 : /************************************************************************/
6994 : /* GetWorkingBufferSize() */
6995 : /************************************************************************/
6996 :
6997 1381780 : static size_t GetWorkingBufferSize(const struct ArrowSchema *schema,
6998 : const struct ArrowArray *array,
6999 : size_t iFeature, int &iArrowIdxInOut,
7000 : const std::vector<FieldInfo> &asFieldInfo)
7001 : {
7002 1381780 : const char *fieldName = schema->name;
7003 1381780 : const char *format = schema->format;
7004 1381780 : if (IsStructure(format))
7005 : {
7006 60166 : size_t nRet = 0;
7007 1381800 : for (int64_t i = 0; i < array->n_children; ++i)
7008 : {
7009 1321630 : nRet += GetWorkingBufferSize(
7010 1321630 : schema->children[i], array->children[i],
7011 1321630 : iFeature + static_cast<size_t>(array->offset), iArrowIdxInOut,
7012 : asFieldInfo);
7013 : }
7014 60166 : return nRet;
7015 : }
7016 1321620 : const int iArrowIdx = iArrowIdxInOut;
7017 1321620 : ++iArrowIdxInOut;
7018 :
7019 1321620 : if (!asFieldInfo[iArrowIdx].bUseStringOptim)
7020 121426 : return 0;
7021 :
7022 1200190 : const uint8_t *pabyValidity =
7023 1200190 : static_cast<const uint8_t *>(array->buffers[0]);
7024 1200310 : if (array->null_count != 0 && pabyValidity &&
7025 119 : !TestBit(pabyValidity, static_cast<size_t>(iFeature + array->offset)))
7026 : {
7027 : // empty string
7028 56 : return 0;
7029 : }
7030 :
7031 1200130 : if (asFieldInfo[iArrowIdx].bUseDictionary)
7032 : {
7033 41 : const uint64_t nDictIdx = GetUInt64Value(schema, array, iFeature);
7034 41 : const auto dictArray = array->dictionary;
7035 41 : if (nDictIdx >= static_cast<uint64_t>(dictArray->length))
7036 : {
7037 1 : CPLError(CE_Failure, CPLE_AppDefined,
7038 : "Feature %" PRIu64
7039 : ", field %s: invalid dictionary index: %" PRIu64,
7040 : static_cast<uint64_t>(iFeature), fieldName, nDictIdx);
7041 1 : return 0;
7042 : }
7043 :
7044 40 : array = dictArray;
7045 40 : schema = schema->dictionary;
7046 40 : format = schema->format;
7047 40 : iFeature = static_cast<size_t>(nDictIdx);
7048 : }
7049 :
7050 1200130 : if (IsString(format))
7051 : {
7052 1200120 : const auto *panOffsets =
7053 1200120 : static_cast<const uint32_t *>(array->buffers[1]) + array->offset;
7054 1200120 : return 1 + (panOffsets[iFeature + 1] - panOffsets[iFeature]);
7055 : }
7056 10 : else if (IsLargeString(format))
7057 : {
7058 10 : const auto *panOffsets =
7059 10 : static_cast<const uint64_t *>(array->buffers[1]) + array->offset;
7060 10 : return 1 + static_cast<size_t>(panOffsets[iFeature + 1] -
7061 10 : panOffsets[iFeature]);
7062 : }
7063 0 : return 0;
7064 : }
7065 :
7066 : /************************************************************************/
7067 : /* FillField() */
7068 : /************************************************************************/
7069 :
7070 : template <typename ArrowType, typename OGRType = ArrowType>
7071 210 : inline static void FillField(const struct ArrowArray *array, int iOGRFieldIdx,
7072 : size_t iFeature, OGRFeature &oFeature)
7073 : {
7074 210 : const auto *panValues = static_cast<const ArrowType *>(array->buffers[1]);
7075 210 : oFeature.SetFieldSameTypeUnsafe(
7076 : iOGRFieldIdx,
7077 210 : static_cast<OGRType>(panValues[iFeature + array->offset]));
7078 210 : }
7079 :
7080 : /************************************************************************/
7081 : /* FillFieldString() */
7082 : /************************************************************************/
7083 :
7084 : template <typename OffsetType>
7085 : inline static void
7086 1200130 : FillFieldString(const struct ArrowArray *array, int iOGRFieldIdx,
7087 : size_t iFeature, int iArrowIdx,
7088 : const std::vector<FieldInfo> &asFieldInfo,
7089 : std::string &osWorkingBuffer, OGRFeature &oFeature)
7090 : {
7091 1200130 : const auto *panOffsets =
7092 1200130 : static_cast<const OffsetType *>(array->buffers[1]) + array->offset;
7093 1200130 : const char *pszStr = static_cast<const char *>(array->buffers[2]);
7094 1200130 : const size_t nLen =
7095 1200130 : static_cast<size_t>(panOffsets[iFeature + 1] - panOffsets[iFeature]);
7096 1200130 : if (asFieldInfo[iArrowIdx].bUseStringOptim)
7097 : {
7098 1200130 : oFeature.SetFieldSameTypeUnsafe(
7099 1200130 : iOGRFieldIdx, &osWorkingBuffer[0] + osWorkingBuffer.size());
7100 1200130 : osWorkingBuffer.append(pszStr + panOffsets[iFeature], nLen);
7101 1200130 : osWorkingBuffer.push_back(0); // append null character
7102 : }
7103 : else
7104 : {
7105 0 : const std::string osTmp(pszStr, nLen);
7106 0 : oFeature.SetField(iOGRFieldIdx, osTmp.c_str());
7107 : }
7108 1200130 : }
7109 :
7110 : /************************************************************************/
7111 : /* FillFieldBinary() */
7112 : /************************************************************************/
7113 :
7114 : template <typename OffsetType>
7115 : inline static bool
7116 60094 : FillFieldBinary(const struct ArrowArray *array, int iOGRFieldIdx,
7117 : size_t iFeature, int iArrowIdx,
7118 : const std::vector<FieldInfo> &asFieldInfo,
7119 : const std::string &osFieldPrefix, const char *pszFieldName,
7120 : OGRFeature &oFeature)
7121 : {
7122 60094 : const auto *panOffsets =
7123 60094 : static_cast<const OffsetType *>(array->buffers[1]) + array->offset;
7124 60094 : const GByte *pabyData = static_cast<const GByte *>(array->buffers[2]) +
7125 60094 : static_cast<size_t>(panOffsets[iFeature]);
7126 60094 : const size_t nLen =
7127 60094 : static_cast<size_t>(panOffsets[iFeature + 1] - panOffsets[iFeature]);
7128 60094 : if (asFieldInfo[iArrowIdx].bIsGeomCol)
7129 : {
7130 60066 : size_t nBytesConsumedOut = 0;
7131 :
7132 : // Check if we can reuse the existing geometry, to save dynamic memory
7133 : // allocations.
7134 60066 : if (nLen >= 5 && pabyData[0] == wkbNDR && pabyData[1] <= wkbTriangle &&
7135 60059 : pabyData[2] == 0 && pabyData[3] == 0 && pabyData[4] == 0)
7136 : {
7137 60059 : const auto poExistingGeom = oFeature.GetGeomFieldRef(iOGRFieldIdx);
7138 120076 : if (poExistingGeom &&
7139 60017 : poExistingGeom->getGeometryType() == pabyData[1])
7140 : {
7141 60017 : poExistingGeom->importFromWkb(pabyData, nLen, wkbVariantIso,
7142 : nBytesConsumedOut);
7143 60017 : return true;
7144 : }
7145 : }
7146 :
7147 49 : OGRGeometry *poGeometry = nullptr;
7148 49 : OGRGeometryFactory::createFromWkb(pabyData, nullptr, &poGeometry, nLen,
7149 : wkbVariantIso, nBytesConsumedOut);
7150 49 : oFeature.SetGeomFieldDirectly(iOGRFieldIdx, poGeometry);
7151 : }
7152 : else
7153 : {
7154 28 : if (nLen > static_cast<size_t>(std::numeric_limits<int>::max()))
7155 : {
7156 0 : CPLError(CE_Failure, CPLE_NotSupported,
7157 : "Content for field %s%s is too large",
7158 : osFieldPrefix.c_str(), pszFieldName);
7159 0 : return false;
7160 : }
7161 28 : oFeature.SetField(iOGRFieldIdx, static_cast<int>(nLen), pabyData);
7162 : }
7163 77 : return true;
7164 : }
7165 :
7166 : /************************************************************************/
7167 : /* FillFeature() */
7168 : /************************************************************************/
7169 :
7170 1321630 : static bool FillFeature(OGRLayer *poLayer, const struct ArrowSchema *schema,
7171 : const struct ArrowArray *array,
7172 : const std::string &osFieldPrefix, size_t iFeature,
7173 : int &iArrowIdxInOut,
7174 : const std::vector<FieldInfo> &asFieldInfo,
7175 : OGRFeature &oFeature, std::string &osWorkingBuffer)
7176 :
7177 : {
7178 1321630 : const char *fieldName = schema->name;
7179 1321630 : const char *format = schema->format;
7180 1321630 : if (IsStructure(format))
7181 : {
7182 38 : const std::string osNewPrefix(osFieldPrefix + fieldName + ".");
7183 78 : for (int64_t i = 0; i < array->n_children; ++i)
7184 : {
7185 59 : if (!FillFeature(
7186 59 : poLayer, schema->children[i], array->children[i],
7187 59 : osNewPrefix, iFeature + static_cast<size_t>(array->offset),
7188 : iArrowIdxInOut, asFieldInfo, oFeature, osWorkingBuffer))
7189 0 : return false;
7190 : }
7191 19 : return true;
7192 : }
7193 1321620 : const int iArrowIdx = iArrowIdxInOut;
7194 1321620 : ++iArrowIdxInOut;
7195 1321620 : const int iOGRFieldIdx = asFieldInfo[iArrowIdx].iOGRFieldIdx;
7196 :
7197 1321620 : if (asFieldInfo[iArrowIdx].bUseDictionary)
7198 : {
7199 62 : format = schema->dictionary->format;
7200 : }
7201 :
7202 1321620 : if (array->null_count != 0)
7203 : {
7204 997 : const uint8_t *pabyValidity =
7205 997 : static_cast<const uint8_t *>(array->buffers[0]);
7206 1945 : if (pabyValidity &&
7207 948 : !TestBit(pabyValidity,
7208 948 : static_cast<size_t>(iFeature + array->offset)))
7209 : {
7210 287 : if (iOGRFieldIdx == FID_COLUMN_SPECIAL_OGR_FIELD_IDX)
7211 5 : oFeature.SetFID(OGRNullFID);
7212 282 : else if (asFieldInfo[iArrowIdx].bIsGeomCol)
7213 60 : oFeature.SetGeomFieldDirectly(iOGRFieldIdx, nullptr);
7214 222 : else if (asFieldInfo[iArrowIdx].eSetFeatureFieldType == OFTString)
7215 : {
7216 119 : OGRField *psField = oFeature.GetRawFieldRef(iOGRFieldIdx);
7217 119 : if (!asFieldInfo[iArrowIdx].bUseStringOptim)
7218 : {
7219 63 : if (IsValidField(psField))
7220 : {
7221 51 : CPLFree(psField->String);
7222 51 : OGR_RawField_SetNull(psField);
7223 : }
7224 : }
7225 : else
7226 : {
7227 56 : OGR_RawField_SetNull(psField);
7228 : }
7229 : }
7230 : else
7231 : {
7232 103 : OGRField *psField = oFeature.GetRawFieldRef(iOGRFieldIdx);
7233 103 : switch (asFieldInfo[iArrowIdx].eSetFeatureFieldType)
7234 : {
7235 47 : case OFTRealList:
7236 : case OFTIntegerList:
7237 : case OFTInteger64List:
7238 47 : if (IsValidField(psField))
7239 47 : CPLFree(psField->IntegerList.paList);
7240 47 : break;
7241 :
7242 7 : case OFTStringList:
7243 7 : if (IsValidField(psField))
7244 7 : CSLDestroy(psField->StringList.paList);
7245 7 : break;
7246 :
7247 1 : case OFTBinary:
7248 1 : if (IsValidField(psField))
7249 1 : CPLFree(psField->Binary.paData);
7250 1 : break;
7251 :
7252 48 : default:
7253 48 : break;
7254 : }
7255 103 : OGR_RawField_SetNull(psField);
7256 : }
7257 287 : return true;
7258 : }
7259 : }
7260 :
7261 1321330 : if (asFieldInfo[iArrowIdx].bUseDictionary)
7262 : {
7263 49 : const uint64_t nDictIdx = GetUInt64Value(schema, array, iFeature);
7264 49 : auto dictArray = array->dictionary;
7265 49 : if (nDictIdx >= static_cast<uint64_t>(dictArray->length))
7266 : {
7267 2 : CPLError(CE_Failure, CPLE_AppDefined,
7268 : "Feature %" PRIu64
7269 : ", field %s: invalid dictionary index: %" PRIu64,
7270 : static_cast<uint64_t>(iFeature),
7271 4 : (osFieldPrefix + fieldName).c_str(), nDictIdx);
7272 2 : return false;
7273 : }
7274 47 : array = dictArray;
7275 47 : schema = schema->dictionary;
7276 47 : iFeature = static_cast<size_t>(nDictIdx);
7277 : }
7278 :
7279 1321330 : if (IsBoolean(format))
7280 : {
7281 12 : const uint8_t *pabyValues =
7282 12 : static_cast<const uint8_t *>(array->buffers[1]);
7283 12 : oFeature.SetFieldSameTypeUnsafe(
7284 : iOGRFieldIdx,
7285 12 : TestBit(pabyValues, static_cast<size_t>(iFeature + array->offset))
7286 : ? 1
7287 : : 0);
7288 12 : return true;
7289 : }
7290 1321310 : else if (IsInt8(format))
7291 : {
7292 10 : FillField<int8_t>(array, iOGRFieldIdx, iFeature, oFeature);
7293 10 : return true;
7294 : }
7295 1321300 : else if (IsUInt8(format))
7296 : {
7297 10 : FillField<uint8_t>(array, iOGRFieldIdx, iFeature, oFeature);
7298 10 : return true;
7299 : }
7300 1321290 : else if (IsInt16(format))
7301 : {
7302 12 : FillField<int16_t>(array, iOGRFieldIdx, iFeature, oFeature);
7303 12 : return true;
7304 : }
7305 1321280 : else if (IsUInt16(format))
7306 : {
7307 10 : FillField<uint16_t>(array, iOGRFieldIdx, iFeature, oFeature);
7308 10 : return true;
7309 : }
7310 1321270 : else if (IsInt32(format))
7311 : {
7312 28 : if (iOGRFieldIdx == FID_COLUMN_SPECIAL_OGR_FIELD_IDX)
7313 : {
7314 2 : const auto *panValues =
7315 2 : static_cast<const int32_t *>(array->buffers[1]);
7316 2 : oFeature.SetFID(panValues[iFeature + array->offset]);
7317 : }
7318 : else
7319 : {
7320 26 : FillField<int32_t>(array, iOGRFieldIdx, iFeature, oFeature);
7321 : }
7322 28 : return true;
7323 : }
7324 1321240 : else if (IsUInt32(format))
7325 : {
7326 4 : FillField<uint32_t, GIntBig>(array, iOGRFieldIdx, iFeature, oFeature);
7327 4 : return true;
7328 : }
7329 1321240 : else if (IsInt64(format))
7330 : {
7331 60122 : if (iOGRFieldIdx == FID_COLUMN_SPECIAL_OGR_FIELD_IDX)
7332 : {
7333 60054 : const auto *panValues =
7334 60054 : static_cast<const int64_t *>(array->buffers[1]);
7335 60054 : oFeature.SetFID(panValues[iFeature + array->offset]);
7336 : }
7337 : else
7338 : {
7339 68 : FillField<int64_t, GIntBig>(array, iOGRFieldIdx, iFeature,
7340 : oFeature);
7341 : }
7342 60122 : return true;
7343 : }
7344 1261120 : else if (IsUInt64(format))
7345 : {
7346 10 : FillField<uint64_t, double>(array, iOGRFieldIdx, iFeature, oFeature);
7347 10 : return true;
7348 : }
7349 1261110 : else if (IsFloat32(format))
7350 : {
7351 12 : FillField<float>(array, iOGRFieldIdx, iFeature, oFeature);
7352 12 : return true;
7353 : }
7354 1261100 : else if (IsFloat64(format))
7355 : {
7356 48 : FillField<double>(array, iOGRFieldIdx, iFeature, oFeature);
7357 48 : return true;
7358 : }
7359 1261050 : else if (IsString(format))
7360 : {
7361 1200120 : FillFieldString<uint32_t>(array, iOGRFieldIdx, iFeature, iArrowIdx,
7362 : asFieldInfo, osWorkingBuffer, oFeature);
7363 1200120 : return true;
7364 : }
7365 60926 : else if (IsLargeString(format))
7366 : {
7367 10 : FillFieldString<uint64_t>(array, iOGRFieldIdx, iFeature, iArrowIdx,
7368 : asFieldInfo, osWorkingBuffer, oFeature);
7369 10 : return true;
7370 : }
7371 60916 : else if (IsBinary(format))
7372 : {
7373 60078 : return FillFieldBinary<uint32_t>(array, iOGRFieldIdx, iFeature,
7374 : iArrowIdx, asFieldInfo, osFieldPrefix,
7375 60078 : fieldName, oFeature);
7376 : }
7377 838 : else if (IsLargeBinary(format))
7378 : {
7379 16 : return FillFieldBinary<uint64_t>(array, iOGRFieldIdx, iFeature,
7380 : iArrowIdx, asFieldInfo, osFieldPrefix,
7381 16 : fieldName, oFeature);
7382 : }
7383 822 : else if (asFieldInfo[iArrowIdx].nPrecision > 0)
7384 : {
7385 : // fits on a int64
7386 46 : CPLAssert(asFieldInfo[iArrowIdx].nPrecision <= 19);
7387 : // either 128 or 256 bits
7388 46 : CPLAssert((asFieldInfo[iArrowIdx].nWidthInBytes % 8) == 0);
7389 46 : const int nWidthIn64BitWord = asFieldInfo[iArrowIdx].nWidthInBytes / 8;
7390 :
7391 46 : if (IsList(format))
7392 : {
7393 16 : const auto panOffsets =
7394 16 : static_cast<const uint32_t *>(array->buffers[1]) +
7395 16 : array->offset;
7396 16 : const auto childArray = array->children[0];
7397 16 : std::vector<double> aValues;
7398 33 : for (auto i = panOffsets[iFeature]; i < panOffsets[iFeature + 1];
7399 : ++i)
7400 : {
7401 17 : aValues.push_back(GetValueDecimal(childArray, nWidthIn64BitWord,
7402 17 : asFieldInfo[iArrowIdx].nScale,
7403 : i));
7404 : }
7405 16 : oFeature.SetField(iOGRFieldIdx, static_cast<int>(aValues.size()),
7406 16 : aValues.data());
7407 16 : return true;
7408 : }
7409 30 : else if (IsLargeList(format))
7410 : {
7411 4 : const auto panOffsets =
7412 4 : static_cast<const uint64_t *>(array->buffers[1]) +
7413 4 : array->offset;
7414 4 : const auto childArray = array->children[0];
7415 4 : std::vector<double> aValues;
7416 4 : for (auto i = static_cast<size_t>(panOffsets[iFeature]);
7417 9 : i < static_cast<size_t>(panOffsets[iFeature + 1]); ++i)
7418 : {
7419 5 : aValues.push_back(GetValueDecimal(childArray, nWidthIn64BitWord,
7420 5 : asFieldInfo[iArrowIdx].nScale,
7421 : i));
7422 : }
7423 4 : oFeature.SetField(iOGRFieldIdx, static_cast<int>(aValues.size()),
7424 4 : aValues.data());
7425 4 : return true;
7426 : }
7427 26 : else if (IsFixedSizeList(format))
7428 : {
7429 4 : const int nVals = GetFixedSizeList(format);
7430 4 : const auto childArray = array->children[0];
7431 4 : std::vector<double> aValues;
7432 12 : for (int i = 0; i < nVals; ++i)
7433 : {
7434 8 : aValues.push_back(GetValueDecimal(childArray, nWidthIn64BitWord,
7435 8 : asFieldInfo[iArrowIdx].nScale,
7436 8 : iFeature * nVals + i));
7437 : }
7438 4 : oFeature.SetField(iOGRFieldIdx, nVals, aValues.data());
7439 4 : return true;
7440 : }
7441 :
7442 22 : CPLAssert(format[0] == ARROW_LETTER_DECIMAL);
7443 :
7444 22 : oFeature.SetFieldSameTypeUnsafe(
7445 : iOGRFieldIdx,
7446 : GetValueDecimal(array, nWidthIn64BitWord,
7447 22 : asFieldInfo[iArrowIdx].nScale, iFeature));
7448 22 : return true;
7449 : }
7450 776 : else if (SetFieldForOtherFormats(
7451 : oFeature, iOGRFieldIdx,
7452 776 : static_cast<size_t>(iFeature + array->offset), schema, array))
7453 : {
7454 776 : return true;
7455 : }
7456 :
7457 0 : CPLError(CE_Failure, CPLE_NotSupported, "%s",
7458 0 : ("Type '" + std::string(format) + "' for field " + osFieldPrefix +
7459 0 : fieldName + " is not supported.")
7460 : .c_str());
7461 0 : return false;
7462 : }
7463 :
7464 : /************************************************************************/
7465 : /* OGRLayer::WriteArrowBatch() */
7466 : /************************************************************************/
7467 :
7468 : // clang-format off
7469 : /** Writes a batch of rows from an ArrowArray.
7470 : *
7471 : * This is semantically close to calling CreateFeature() with multiple features
7472 : * at once.
7473 : *
7474 : * The ArrowArray must be of type struct (format=+s), and its children generally
7475 : * map to a OGR attribute or geometry field (unless they are struct themselves).
7476 : *
7477 : * Method IsArrowSchemaSupported() can be called to determine if the schema
7478 : * will be supported by WriteArrowBatch().
7479 : *
7480 : * OGR fields for the corresponding children arrays must exist and be of a
7481 : * compatible type. For attribute fields, they should generally be created with
7482 : * CreateFieldFromArrowSchema(). This is strictly required for output drivers
7483 : * Arrow or Parquet, and strongly recommended otherwise. For geometry fields,
7484 : * they should be created either implicitly at CreateLayer() type
7485 : * (if geom_type != wkbNone), or explicitly with CreateGeomField().
7486 : *
7487 : * Starting with GDAL 3.9, some tolerance has been introduced in the base
7488 : * implementation of WriteArrowBatch() for scenarios that involve appending to
7489 : * an already existing output layer when the input Arrow field type and the
7490 : * OGR layer field type are 32/64-bi integers or real number, but do not match
7491 : * exactly, which may cause lossy conversions. The IF_FIELD_NOT_PRESERVED option
7492 : * can be used to control the behavior in case of lossy conversion.
7493 : *
7494 : * Arrays for geometry columns should be of binary or large binary type and
7495 : * contain WKB geometry.
7496 : *
7497 : * Note that the passed array may be set to a released state
7498 : * (array->release==NULL) after this call (not by the base implementation,
7499 : * but in specialized ones such as Parquet or Arrow for example)
7500 : *
7501 : * Supported options of the base implementation are:
7502 : * <ul>
7503 : * <li>FID=name. Name of the FID column in the array. If not provided,
7504 : * GetFIDColumn() is used to determine it. The special name
7505 : * OGRLayer::DEFAULT_ARROW_FID_NAME is also recognized if neither FID nor
7506 : * GetFIDColumn() are set.
7507 : * The corresponding ArrowArray must be of type int32 (i) or int64 (l).
7508 : * On input, values of the FID column are used to create the feature.
7509 : * On output, the values of the FID column may be set with the FID of the
7510 : * created feature (if the array is not released).
7511 : * </li>
7512 : * <li>IF_FID_NOT_PRESERVED=NOTHING/ERROR/WARNING. Action to perform when the
7513 : * input FID is not preserved in the output layer. The default is NOTHING.
7514 : * Setting it to ERROR will cause the function to error out. Setting it
7515 : * to WARNING will cause the function to emit a warning but continue its
7516 : * processing.
7517 : * </li>
7518 : * <li>IF_FIELD_NOT_PRESERVED=ERROR/WARNING. (since GDAL 3.9)
7519 : * Action to perform when the input field value is not preserved in the
7520 : * output layer.
7521 : * The default is WARNING, which will cause the function to emit a warning
7522 : * but continue its processing.
7523 : * Setting it to ERROR will cause the function to error out if a lossy
7524 : * conversion is detected.
7525 : * </li>
7526 : * <li>GEOMETRY_NAME=name. Name of the geometry column. If not provided,
7527 : * GetGeometryColumn() is used. The special name
7528 : * OGRLayer::DEFAULT_ARROW_GEOMETRY_NAME is also recognized if neither
7529 : * GEOMETRY_NAME nor GetGeometryColumn() are set.
7530 : * Geometry columns are also identified if they have
7531 : * ARROW:extension:name=ogc.wkb as a field metadata.
7532 : * The corresponding ArrowArray must be of type binary (w) or large
7533 : * binary (W).
7534 : * </li>
7535 : * </ul>
7536 : *
7537 : * The following example demonstrates how to copy a layer from one format to
7538 : * another one (assuming it has at most a single geometry column):
7539 : \code{.py}
7540 : def copy_layer(src_lyr, out_filename, out_format, lcos = {}):
7541 : stream = src_lyr.GetArrowStream()
7542 : schema = stream.GetSchema()
7543 :
7544 : # If the source layer has a FID column and the output driver supports
7545 : # a FID layer creation option, set it to the source FID column name.
7546 : if src_lyr.GetFIDColumn():
7547 : creationOptions = gdal.GetDriverByName(out_format).GetMetadataItem(
7548 : "DS_LAYER_CREATIONOPTIONLIST"
7549 : )
7550 : if creationOptions and '"FID"' in creationOptions:
7551 : lcos["FID"] = src_lyr.GetFIDColumn()
7552 :
7553 : with ogr.GetDriverByName(out_format).CreateDataSource(out_filename) as out_ds:
7554 : if src_lyr.GetLayerDefn().GetGeomFieldCount() > 1:
7555 : out_lyr = out_ds.CreateLayer(
7556 : src_lyr.GetName(), geom_type=ogr.wkbNone, options=lcos
7557 : )
7558 : for i in range(src_lyr.GetLayerDefn().GetGeomFieldCount()):
7559 : out_lyr.CreateGeomField(src_lyr.GetLayerDefn().GetGeomFieldDefn(i))
7560 : else:
7561 : out_lyr = out_ds.CreateLayer(
7562 : src_lyr.GetName(),
7563 : geom_type=src_lyr.GetGeomType(),
7564 : srs=src_lyr.GetSpatialRef(),
7565 : options=lcos,
7566 : )
7567 :
7568 : success, error_msg = out_lyr.IsArrowSchemaSupported(schema)
7569 : assert success, error_msg
7570 :
7571 : src_geom_field_names = [
7572 : src_lyr.GetLayerDefn().GetGeomFieldDefn(i).GetName()
7573 : for i in range(src_lyr.GetLayerDefn().GetGeomFieldCount())
7574 : ]
7575 : for i in range(schema.GetChildrenCount()):
7576 : # GetArrowStream() may return "OGC_FID" for a unnamed source FID
7577 : # column and "wkb_geometry" for a unnamed source geometry column.
7578 : # Also test GetFIDColumn() and src_geom_field_names if they are
7579 : # named.
7580 : if (
7581 : schema.GetChild(i).GetName()
7582 : not in ("OGC_FID", "wkb_geometry", src_lyr.GetFIDColumn())
7583 : and schema.GetChild(i).GetName() not in src_geom_field_names
7584 : ):
7585 : out_lyr.CreateFieldFromArrowSchema(schema.GetChild(i))
7586 :
7587 : write_options = []
7588 : if src_lyr.GetFIDColumn():
7589 : write_options.append("FID=" + src_lyr.GetFIDColumn())
7590 : if (
7591 : src_lyr.GetLayerDefn().GetGeomFieldCount() == 1
7592 : and src_lyr.GetGeometryColumn()
7593 : ):
7594 : write_options.append("GEOMETRY_NAME=" + src_lyr.GetGeometryColumn())
7595 :
7596 : while True:
7597 : array = stream.GetNextRecordBatch()
7598 : if array is None:
7599 : break
7600 : out_lyr.WriteArrowBatch(schema, array, write_options)
7601 : \endcode
7602 : *
7603 : * This method and CreateFeature() are mutually exclusive in the same session.
7604 : *
7605 : * This method is the same as the C function OGR_L_WriteArrowBatch().
7606 : *
7607 : * @param schema Schema of array
7608 : * @param array Array of type struct. It may be released (array->release==NULL)
7609 : * after calling this method.
7610 : * @param papszOptions Options. Null terminated list, or nullptr.
7611 : * @return true in case of success
7612 : * @since 3.8
7613 : */
7614 : // clang-format on
7615 :
7616 79 : bool OGRLayer::WriteArrowBatch(const struct ArrowSchema *schema,
7617 : struct ArrowArray *array,
7618 : CSLConstList papszOptions)
7619 : {
7620 79 : const char *format = schema->format;
7621 79 : if (!IsStructure(format))
7622 : {
7623 0 : CPLError(CE_Failure, CPLE_AppDefined,
7624 : "WriteArrowBatch() should be called on a schema that is a "
7625 : "struct of fields");
7626 0 : return false;
7627 : }
7628 :
7629 79 : if (schema->n_children != array->n_children)
7630 : {
7631 0 : CPLError(CE_Failure, CPLE_AppDefined,
7632 : "WriteArrowBatch(): schema->n_children (%d) != "
7633 : "array->n_children (%d)",
7634 0 : int(schema->n_children), int(array->n_children));
7635 0 : return false;
7636 : }
7637 :
7638 158 : CPLStringList aosNativeTypes;
7639 79 : auto poDS = const_cast<OGRLayer *>(this)->GetDataset();
7640 79 : if (poDS)
7641 : {
7642 79 : auto poDriver = poDS->GetDriver();
7643 79 : if (poDriver)
7644 : {
7645 : const char *pszMetadataItem =
7646 79 : poDriver->GetMetadataItem(GDAL_DMD_CREATIONFIELDDATATYPES);
7647 79 : if (pszMetadataItem)
7648 79 : aosNativeTypes = CSLTokenizeString2(pszMetadataItem, " ", 0);
7649 : }
7650 : }
7651 :
7652 158 : std::vector<FieldInfo> asFieldInfo;
7653 79 : auto poLayerDefn = GetLayerDefn();
7654 : const char *pszFIDName =
7655 79 : CSLFetchNameValueDef(papszOptions, "FID", GetFIDColumn());
7656 79 : if (!pszFIDName || pszFIDName[0] == 0)
7657 59 : pszFIDName = DEFAULT_ARROW_FID_NAME;
7658 : const bool bErrorIfFIDNotPreserved =
7659 79 : EQUAL(CSLFetchNameValueDef(papszOptions, "IF_FID_NOT_PRESERVED", ""),
7660 : "ERROR");
7661 : const bool bWarningIfFIDNotPreserved =
7662 79 : EQUAL(CSLFetchNameValueDef(papszOptions, "IF_FID_NOT_PRESERVED", ""),
7663 : "WARNING");
7664 : const bool bErrorIfFieldNotPreserved =
7665 79 : EQUAL(CSLFetchNameValueDef(papszOptions, "IF_FIELD_NOT_PRESERVED", ""),
7666 : "ERROR");
7667 79 : const char *pszGeomFieldName = CSLFetchNameValueDef(
7668 79 : papszOptions, "GEOMETRY_NAME", GetGeometryColumn());
7669 79 : if (!pszGeomFieldName || pszGeomFieldName[0] == 0)
7670 58 : pszGeomFieldName = DEFAULT_ARROW_GEOMETRY_NAME;
7671 79 : const struct ArrowSchema *schemaFIDColumn = nullptr;
7672 79 : struct ArrowArray *arrayFIDColumn = nullptr;
7673 79 : bool bFallbackTypesUsed = false;
7674 803 : for (int64_t i = 0; i < schema->n_children; ++i)
7675 : {
7676 725 : if (!BuildOGRFieldInfo(schema->children[i], array->children[i],
7677 725 : poLayerDefn, std::string(), aosNativeTypes,
7678 : bFallbackTypesUsed, asFieldInfo, pszFIDName,
7679 : pszGeomFieldName, this,
7680 725 : m_poPrivate->m_oMapArrowFieldNameToOGRFieldName,
7681 : schemaFIDColumn, arrayFIDColumn))
7682 : {
7683 1 : return false;
7684 : }
7685 : }
7686 :
7687 156 : std::map<int, int> oMapOGRFieldIndexToFieldInfoIndex;
7688 156 : std::vector<bool> abUseStringOptim(poLayerDefn->GetFieldCount(), false);
7689 822 : for (int i = 0; i < static_cast<int>(asFieldInfo.size()); ++i)
7690 : {
7691 744 : if (asFieldInfo[i].iOGRFieldIdx >= 0 && !asFieldInfo[i].bIsGeomCol)
7692 : {
7693 645 : CPLAssert(oMapOGRFieldIndexToFieldInfoIndex.find(
7694 : asFieldInfo[i].iOGRFieldIdx) ==
7695 : oMapOGRFieldIndexToFieldInfoIndex.end());
7696 645 : oMapOGRFieldIndexToFieldInfoIndex[asFieldInfo[i].iOGRFieldIdx] = i;
7697 1290 : abUseStringOptim[asFieldInfo[i].iOGRFieldIdx] =
7698 1290 : asFieldInfo[i].bUseStringOptim;
7699 : }
7700 : }
7701 :
7702 156 : OGRFeatureDefn oLayerDefnTmp(poLayerDefn->GetName());
7703 :
7704 : struct LayerDefnTmpRefReleaser
7705 : {
7706 : OGRFeatureDefn &m_oDefn;
7707 :
7708 78 : explicit LayerDefnTmpRefReleaser(OGRFeatureDefn &oDefn) : m_oDefn(oDefn)
7709 : {
7710 78 : m_oDefn.Reference();
7711 78 : }
7712 :
7713 78 : ~LayerDefnTmpRefReleaser()
7714 78 : {
7715 78 : m_oDefn.Dereference();
7716 78 : }
7717 : };
7718 :
7719 156 : LayerDefnTmpRefReleaser oLayerDefnTmpRefReleaser(oLayerDefnTmp);
7720 :
7721 156 : std::vector<int> anIdentityFieldMap;
7722 78 : if (bFallbackTypesUsed)
7723 : {
7724 29 : oLayerDefnTmp.SetGeomType(wkbNone);
7725 98 : for (int i = 0; i < poLayerDefn->GetFieldCount(); ++i)
7726 : {
7727 69 : anIdentityFieldMap.push_back(i);
7728 69 : const auto poSrcFieldDefn = poLayerDefn->GetFieldDefn(i);
7729 69 : const auto oIter = oMapOGRFieldIndexToFieldInfoIndex.find(i);
7730 : OGRFieldDefn oFieldDefn(
7731 : poSrcFieldDefn->GetNameRef(),
7732 69 : oIter == oMapOGRFieldIndexToFieldInfoIndex.end()
7733 1 : ? poSrcFieldDefn->GetType()
7734 139 : : asFieldInfo[oIter->second].eNominalFieldType);
7735 69 : if (oIter != oMapOGRFieldIndexToFieldInfoIndex.end())
7736 68 : asFieldInfo[oIter->second].eSetFeatureFieldType =
7737 68 : asFieldInfo[oIter->second].eNominalFieldType;
7738 69 : oLayerDefnTmp.AddFieldDefn(&oFieldDefn);
7739 : }
7740 57 : for (int i = 0; i < poLayerDefn->GetGeomFieldCount(); ++i)
7741 : {
7742 28 : oLayerDefnTmp.AddGeomFieldDefn(poLayerDefn->GetGeomFieldDefn(i));
7743 : }
7744 : }
7745 : else
7746 : {
7747 694 : for (auto &sFieldInfo : asFieldInfo)
7748 645 : sFieldInfo.eSetFeatureFieldType = sFieldInfo.eTargetFieldType;
7749 : }
7750 :
7751 : struct FeatureCleaner
7752 : {
7753 : OGRFeature &m_oFeature;
7754 : const std::vector<bool> &m_abUseStringOptim;
7755 :
7756 78 : explicit FeatureCleaner(OGRFeature &oFeature,
7757 : const std::vector<bool> &abUseStringOptim)
7758 78 : : m_oFeature(oFeature), m_abUseStringOptim(abUseStringOptim)
7759 : {
7760 78 : }
7761 :
7762 : // As we set a value that can't be CPLFree()'d in the .String member
7763 : // of string fields, we must take care of manually unsetting it before
7764 : // the destructor of OGRFeature gets called.
7765 78 : ~FeatureCleaner()
7766 78 : {
7767 78 : const auto poLayerDefn = m_oFeature.GetDefnRef();
7768 78 : const int nFieldCount = poLayerDefn->GetFieldCount();
7769 727 : for (int i = 0; i < nFieldCount; ++i)
7770 : {
7771 649 : if (m_abUseStringOptim[i])
7772 : {
7773 126 : if (m_oFeature.IsFieldSetAndNotNullUnsafe(i))
7774 98 : m_oFeature.SetFieldSameTypeUnsafe(
7775 : i, static_cast<char *>(nullptr));
7776 : }
7777 : }
7778 78 : }
7779 : };
7780 :
7781 156 : OGRFeature oFeature(bFallbackTypesUsed ? &oLayerDefnTmp : poLayerDefn);
7782 156 : FeatureCleaner oCleaner(oFeature, abUseStringOptim);
7783 156 : OGRFeature oFeatureTarget(poLayerDefn);
7784 78 : OGRFeature *const poFeatureTarget =
7785 78 : bFallbackTypesUsed ? &oFeatureTarget : &oFeature;
7786 :
7787 : // We accumulate the content of all strings in osWorkingBuffer to avoid
7788 : // a few dynamic memory allocations
7789 156 : std::string osWorkingBuffer;
7790 :
7791 : bool bTransactionOK;
7792 : {
7793 78 : CPLErrorStateBackuper oBackuper(CPLQuietErrorHandler);
7794 78 : bTransactionOK = StartTransaction() == OGRERR_NONE;
7795 : }
7796 :
7797 156 : const std::string emptyString;
7798 78 : int64_t fidNullCount = 0;
7799 60215 : for (size_t iFeature = 0; iFeature < static_cast<size_t>(array->length);
7800 : ++iFeature)
7801 : {
7802 60147 : oFeature.SetFID(OGRNullFID);
7803 :
7804 60147 : int iArrowIdx = 0;
7805 60147 : const size_t nWorkingBufferSize = GetWorkingBufferSize(
7806 : schema, array, iFeature, iArrowIdx, asFieldInfo);
7807 60147 : osWorkingBuffer.clear();
7808 60147 : osWorkingBuffer.reserve(nWorkingBufferSize);
7809 : #ifdef DEBUG
7810 60147 : const char *pszWorkingBuffer = osWorkingBuffer.c_str();
7811 60147 : CPL_IGNORE_RET_VAL(pszWorkingBuffer);
7812 : #endif
7813 60147 : iArrowIdx = 0;
7814 1381720 : for (int64_t i = 0; i < schema->n_children; ++i)
7815 : {
7816 1321580 : if (!FillFeature(this, schema->children[i], array->children[i],
7817 : emptyString, iFeature, iArrowIdx, asFieldInfo,
7818 : oFeature, osWorkingBuffer))
7819 : {
7820 2 : if (bTransactionOK)
7821 2 : RollbackTransaction();
7822 10 : return false;
7823 : }
7824 : }
7825 : #ifdef DEBUG
7826 : // Check that the buffer didn't get reallocated
7827 60145 : CPLAssert(pszWorkingBuffer == osWorkingBuffer.c_str());
7828 60145 : CPLAssert(osWorkingBuffer.size() == nWorkingBufferSize);
7829 : #endif
7830 :
7831 60145 : if (bFallbackTypesUsed)
7832 : {
7833 44 : oFeatureTarget.SetFrom(&oFeature, anIdentityFieldMap.data(),
7834 : /*bForgiving=*/true,
7835 : /*bUseISO8601ForDateTimeAsString=*/true);
7836 44 : oFeatureTarget.SetFID(oFeature.GetFID());
7837 :
7838 44 : if (bErrorIfFieldNotPreserved)
7839 : {
7840 26 : for (int i = 0; i < poLayerDefn->GetFieldCount(); ++i)
7841 : {
7842 16 : if (!oFeature.IsFieldSetAndNotNullUnsafe(i))
7843 : {
7844 4 : continue;
7845 : }
7846 12 : bool bLossyConversion = false;
7847 : const auto eSrcType =
7848 12 : oLayerDefnTmp.GetFieldDefnUnsafe(i)->GetType();
7849 : const auto eDstType =
7850 12 : poLayerDefn->GetFieldDefnUnsafe(i)->GetType();
7851 :
7852 : const auto IsDoubleCastToInt64EqualTInt64 =
7853 2 : [](double dfVal, int64_t nOtherVal)
7854 : {
7855 : // Values in the range [INT64_MAX - 1023, INT64_MAX - 1]
7856 : // get converted to a double that once cast to int64_t
7857 : // is INT64_MAX + 1, hence the strict < comparison
7858 : return dfVal >=
7859 2 : static_cast<double>(
7860 2 : std::numeric_limits<int64_t>::min()) &&
7861 : dfVal <
7862 2 : static_cast<double>(
7863 4 : std::numeric_limits<int64_t>::max()) &&
7864 3 : static_cast<int64_t>(dfVal) == nOtherVal;
7865 : };
7866 :
7867 14 : if (eSrcType == OFTInteger64 && eDstType == OFTInteger &&
7868 2 : oFeatureTarget.GetFieldAsIntegerUnsafe(i) !=
7869 2 : oFeature.GetFieldAsInteger64Unsafe(i))
7870 : {
7871 1 : bLossyConversion = true;
7872 : }
7873 14 : else if (eSrcType == OFTReal && eDstType == OFTInteger &&
7874 3 : oFeatureTarget.GetFieldAsIntegerUnsafe(i) !=
7875 3 : oFeature.GetFieldAsDoubleUnsafe(i))
7876 : {
7877 2 : bLossyConversion = true;
7878 : }
7879 12 : else if (eSrcType == OFTReal && eDstType == OFTInteger64 &&
7880 3 : static_cast<double>(
7881 3 : oFeatureTarget.GetFieldAsInteger64Unsafe(i)) !=
7882 3 : oFeature.GetFieldAsDoubleUnsafe(i))
7883 : {
7884 2 : bLossyConversion = true;
7885 : }
7886 9 : else if (eSrcType == OFTInteger64 && eDstType == OFTReal &&
7887 2 : !IsDoubleCastToInt64EqualTInt64(
7888 : oFeatureTarget.GetFieldAsDoubleUnsafe(i),
7889 2 : oFeature.GetFieldAsInteger64Unsafe(i)))
7890 : {
7891 1 : bLossyConversion = true;
7892 : }
7893 12 : if (bLossyConversion)
7894 : {
7895 6 : CPLError(CE_Failure, CPLE_AppDefined,
7896 : "For feature " CPL_FRMT_GIB
7897 : ", value of field %s cannot not preserved",
7898 : oFeatureTarget.GetFID(),
7899 : oLayerDefnTmp.GetFieldDefn(i)->GetNameRef());
7900 6 : if (bTransactionOK)
7901 6 : RollbackTransaction();
7902 6 : return false;
7903 : }
7904 : }
7905 : }
7906 : }
7907 :
7908 60139 : const auto nInputFID = poFeatureTarget->GetFID();
7909 60139 : if (CreateFeature(poFeatureTarget) != OGRERR_NONE)
7910 : {
7911 1 : if (bTransactionOK)
7912 1 : RollbackTransaction();
7913 1 : return false;
7914 : }
7915 60138 : if (nInputFID != OGRNullFID)
7916 : {
7917 120087 : if (bWarningIfFIDNotPreserved &&
7918 : // cppcheck-suppress knownConditionTrueFalse
7919 60032 : poFeatureTarget->GetFID() != nInputFID)
7920 : {
7921 2 : CPLError(CE_Warning, CPLE_AppDefined,
7922 : "Feature id " CPL_FRMT_GIB " not preserved",
7923 : nInputFID);
7924 : }
7925 60054 : else if (bErrorIfFIDNotPreserved &&
7926 : // cppcheck-suppress knownConditionTrueFalse
7927 1 : poFeatureTarget->GetFID() != nInputFID)
7928 : {
7929 1 : CPLError(CE_Failure, CPLE_AppDefined,
7930 : "Feature id " CPL_FRMT_GIB " not preserved",
7931 : nInputFID);
7932 1 : if (bTransactionOK)
7933 1 : RollbackTransaction();
7934 1 : return false;
7935 : }
7936 : }
7937 :
7938 60137 : if (arrayFIDColumn)
7939 : {
7940 60059 : uint8_t *pabyValidity = static_cast<uint8_t *>(
7941 60059 : const_cast<void *>(arrayFIDColumn->buffers[0]));
7942 60059 : if (IsInt32(schemaFIDColumn->format))
7943 : {
7944 6 : auto *panValues = static_cast<int32_t *>(
7945 6 : const_cast<void *>(arrayFIDColumn->buffers[1]));
7946 6 : if (poFeatureTarget->GetFID() >
7947 6 : std::numeric_limits<int32_t>::max())
7948 : {
7949 0 : if (pabyValidity)
7950 : {
7951 0 : ++fidNullCount;
7952 0 : UnsetBit(pabyValidity,
7953 0 : static_cast<size_t>(iFeature +
7954 0 : arrayFIDColumn->offset));
7955 : }
7956 0 : CPLError(CE_Warning, CPLE_AppDefined,
7957 : "FID " CPL_FRMT_GIB
7958 : " cannot be stored in FID array of type int32",
7959 : poFeatureTarget->GetFID());
7960 : }
7961 : else
7962 : {
7963 6 : if (pabyValidity)
7964 : {
7965 5 : SetBit(pabyValidity,
7966 5 : static_cast<size_t>(iFeature +
7967 5 : arrayFIDColumn->offset));
7968 : }
7969 6 : panValues[iFeature + arrayFIDColumn->offset] =
7970 6 : static_cast<int32_t>(poFeatureTarget->GetFID());
7971 : }
7972 : }
7973 60053 : else if (IsInt64(schemaFIDColumn->format))
7974 : {
7975 60053 : if (pabyValidity)
7976 : {
7977 0 : SetBit(
7978 : pabyValidity,
7979 0 : static_cast<size_t>(iFeature + arrayFIDColumn->offset));
7980 : }
7981 60053 : auto *panValues = static_cast<int64_t *>(
7982 60053 : const_cast<void *>(arrayFIDColumn->buffers[1]));
7983 60053 : panValues[iFeature + arrayFIDColumn->offset] =
7984 60053 : poFeatureTarget->GetFID();
7985 : }
7986 : else
7987 : {
7988 0 : CPLAssert(false);
7989 : }
7990 : }
7991 : }
7992 68 : if (arrayFIDColumn && arrayFIDColumn->buffers[0])
7993 : {
7994 1 : arrayFIDColumn->null_count = fidNullCount;
7995 : }
7996 :
7997 68 : bool bRet = true;
7998 68 : if (bTransactionOK)
7999 62 : bRet = CommitTransaction() == OGRERR_NONE;
8000 :
8001 68 : return bRet;
8002 : }
8003 :
8004 : /************************************************************************/
8005 : /* OGR_L_WriteArrowBatch() */
8006 : /************************************************************************/
8007 :
8008 : // clang-format off
8009 : /** Writes a batch of rows from an ArrowArray.
8010 : *
8011 : * This is semantically close to calling CreateFeature() with multiple features
8012 : * at once.
8013 : *
8014 : * The ArrowArray must be of type struct (format=+s), and its children generally
8015 : * map to a OGR attribute or geometry field (unless they are struct themselves).
8016 : *
8017 : * Method IsArrowSchemaSupported() can be called to determine if the schema
8018 : * will be supported by WriteArrowBatch().
8019 : *
8020 : * OGR fields for the corresponding children arrays must exist and be of a
8021 : * compatible type. For attribute fields, they should generally be created with
8022 : * CreateFieldFromArrowSchema(). This is strictly required for output drivers
8023 : * Arrow or Parquet, and strongly recommended otherwise. For geometry fields,
8024 : * they should be created either implicitly at CreateLayer() type
8025 : * (if geom_type != wkbNone), or explicitly with CreateGeomField().
8026 : *
8027 : * Starting with GDAL 3.9, some tolerance has been introduced in the base
8028 : * implementation of WriteArrowBatch() for scenarios that involve appending to
8029 : * an already existing output layer when the input Arrow field type and the
8030 : * OGR layer field type are 32/64-bi integers or real number, but do not match
8031 : * exactly, which may cause lossy conversions. The IF_FIELD_NOT_PRESERVED option
8032 : * can be used to control the behavior in case of lossy conversion.
8033 : *
8034 : * Arrays for geometry columns should be of binary or large binary type and
8035 : * contain WKB geometry.
8036 : *
8037 : * Note that the passed array may be set to a released state
8038 : * (array->release==NULL) after this call (not by the base implementation,
8039 : * but in specialized ones such as Parquet or Arrow for example)
8040 : *
8041 : * Supported options of the base implementation are:
8042 : * <ul>
8043 : * <li>FID=name. Name of the FID column in the array. If not provided,
8044 : * GetFIDColumn() is used to determine it. The special name
8045 : * OGRLayer::DEFAULT_ARROW_FID_NAME is also recognized if neither FID nor
8046 : * GetFIDColumn() are set.
8047 : * The corresponding ArrowArray must be of type int32 (i) or int64 (l).
8048 : * On input, values of the FID column are used to create the feature.
8049 : * On output, the values of the FID column may be set with the FID of the
8050 : * created feature (if the array is not released).
8051 : * </li>
8052 : * <li>IF_FID_NOT_PRESERVED=NOTHING/ERROR/WARNING. Action to perform when the
8053 : * input FID is not preserved in the output layer. The default is NOTHING.
8054 : * Setting it to ERROR will cause the function to error out. Setting it
8055 : * to WARNING will cause the function to emit a warning but continue its
8056 : * processing.
8057 : * </li>
8058 : * <li>IF_FIELD_NOT_PRESERVED=ERROR/WARNING. (since GDAL 3.9)
8059 : * Action to perform when the input field value is not preserved in the
8060 : * output layer.
8061 : * The default is WARNING, which will cause the function to emit a warning
8062 : * but continue its processing.
8063 : * Setting it to ERROR will cause the function to error out if a lossy
8064 : * conversion is detected.
8065 : * </li>
8066 : * <li>GEOMETRY_NAME=name. Name of the geometry column. If not provided,
8067 : * GetGeometryColumn() is used. The special name
8068 : * OGRLayer::DEFAULT_ARROW_GEOMETRY_NAME is also recognized if neither
8069 : * GEOMETRY_NAME nor GetGeometryColumn() are set.
8070 : * Geometry columns are also identified if they have
8071 : * ARROW:extension:name=ogc.wkb as a field metadata.
8072 : * The corresponding ArrowArray must be of type binary (w) or large
8073 : * binary (W).
8074 : * </li>
8075 : * </ul>
8076 : *
8077 : * The following example demonstrates how to copy a layer from one format to
8078 : * another one (assuming it has at most a single geometry column):
8079 : \code{.py}
8080 : def copy_layer(src_lyr, out_filename, out_format, lcos = {}):
8081 : stream = src_lyr.GetArrowStream()
8082 : schema = stream.GetSchema()
8083 :
8084 : # If the source layer has a FID column and the output driver supports
8085 : # a FID layer creation option, set it to the source FID column name.
8086 : if src_lyr.GetFIDColumn():
8087 : creationOptions = gdal.GetDriverByName(out_format).GetMetadataItem(
8088 : "DS_LAYER_CREATIONOPTIONLIST"
8089 : )
8090 : if creationOptions and '"FID"' in creationOptions:
8091 : lcos["FID"] = src_lyr.GetFIDColumn()
8092 :
8093 : with ogr.GetDriverByName(out_format).CreateDataSource(out_filename) as out_ds:
8094 : if src_lyr.GetLayerDefn().GetGeomFieldCount() > 1:
8095 : out_lyr = out_ds.CreateLayer(
8096 : src_lyr.GetName(), geom_type=ogr.wkbNone, options=lcos
8097 : )
8098 : for i in range(src_lyr.GetLayerDefn().GetGeomFieldCount()):
8099 : out_lyr.CreateGeomField(src_lyr.GetLayerDefn().GetGeomFieldDefn(i))
8100 : else:
8101 : out_lyr = out_ds.CreateLayer(
8102 : src_lyr.GetName(),
8103 : geom_type=src_lyr.GetGeomType(),
8104 : srs=src_lyr.GetSpatialRef(),
8105 : options=lcos,
8106 : )
8107 :
8108 : success, error_msg = out_lyr.IsArrowSchemaSupported(schema)
8109 : assert success, error_msg
8110 :
8111 : src_geom_field_names = [
8112 : src_lyr.GetLayerDefn().GetGeomFieldDefn(i).GetName()
8113 : for i in range(src_lyr.GetLayerDefn().GetGeomFieldCount())
8114 : ]
8115 : for i in range(schema.GetChildrenCount()):
8116 : # GetArrowStream() may return "OGC_FID" for a unnamed source FID
8117 : # column and "wkb_geometry" for a unnamed source geometry column.
8118 : # Also test GetFIDColumn() and src_geom_field_names if they are
8119 : # named.
8120 : if (
8121 : schema.GetChild(i).GetName()
8122 : not in ("OGC_FID", "wkb_geometry", src_lyr.GetFIDColumn())
8123 : and schema.GetChild(i).GetName() not in src_geom_field_names
8124 : ):
8125 : out_lyr.CreateFieldFromArrowSchema(schema.GetChild(i))
8126 :
8127 : write_options = []
8128 : if src_lyr.GetFIDColumn():
8129 : write_options.append("FID=" + src_lyr.GetFIDColumn())
8130 : if (
8131 : src_lyr.GetLayerDefn().GetGeomFieldCount() == 1
8132 : and src_lyr.GetGeometryColumn()
8133 : ):
8134 : write_options.append("GEOMETRY_NAME=" + src_lyr.GetGeometryColumn())
8135 :
8136 : while True:
8137 : array = stream.GetNextRecordBatch()
8138 : if array is None:
8139 : break
8140 : out_lyr.WriteArrowBatch(schema, array, write_options)
8141 : \endcode
8142 : *
8143 : * This method and CreateFeature() are mutually exclusive in the same session.
8144 : *
8145 : * This method is the same as the C++ method OGRLayer::WriteArrowBatch().
8146 : *
8147 : * @param hLayer Layer.
8148 : * @param schema Schema of array.
8149 : * @param array Array of type struct. It may be released (array->release==NULL)
8150 : * after calling this method.
8151 : * @param papszOptions Options. Null terminated list, or nullptr.
8152 : * @return true in case of success
8153 : * @since 3.8
8154 : */
8155 : // clang-format on
8156 :
8157 59 : bool OGR_L_WriteArrowBatch(OGRLayerH hLayer, const struct ArrowSchema *schema,
8158 : struct ArrowArray *array, char **papszOptions)
8159 : {
8160 59 : VALIDATE_POINTER1(hLayer, __func__, false);
8161 59 : VALIDATE_POINTER1(schema, __func__, false);
8162 59 : VALIDATE_POINTER1(array, __func__, false);
8163 :
8164 118 : return OGRLayer::FromHandle(hLayer)->WriteArrowBatch(schema, array,
8165 59 : papszOptions);
8166 : }
|