Line data Source code
1 : /******************************************************************************
2 : *
3 : * Project: OpenGIS Simple Features Reference Implementation
4 : * Purpose: Parts of OGRLayer dealing with Arrow C interface
5 : * Author: Even Rouault, <even dot rouault at spatialys.com>
6 : *
7 : ******************************************************************************
8 : * Copyright (c) 2022-2023, Even Rouault <even dot rouault at spatialys.com>
9 : *
10 : * SPDX-License-Identifier: MIT
11 : ****************************************************************************/
12 :
13 : #include "ogrsf_frmts.h"
14 : #include "ogr_api.h"
15 : #include "ogr_recordbatch.h"
16 : #include "ograrrowarrayhelper.h"
17 : #include "ogrlayerarrow.h"
18 : #include "ogr_p.h"
19 : #include "ogr_swq.h"
20 : #include "ogr_wkb.h"
21 : #include "ogr_p.h"
22 : #include "ogrlayer_private.h"
23 :
24 : #include "cpl_float.h"
25 : #include "cpl_json.h"
26 : #include "cpl_time.h"
27 : #include <cassert>
28 : #include <cinttypes>
29 : #include <limits>
30 : #include <utility>
31 : #include <set>
32 :
33 : constexpr const char *MD_GDAL_OGR_ALTERNATIVE_NAME =
34 : "GDAL:OGR:alternative_name";
35 : constexpr const char *MD_GDAL_OGR_COMMENT = "GDAL:OGR:comment";
36 : constexpr const char *MD_GDAL_OGR_DEFAULT = "GDAL:OGR:default";
37 : constexpr const char *MD_GDAL_OGR_SUBTYPE = "GDAL:OGR:subtype";
38 : constexpr const char *MD_GDAL_OGR_WIDTH = "GDAL:OGR:width";
39 : constexpr const char *MD_GDAL_OGR_UNIQUE = "GDAL:OGR:unique";
40 : constexpr const char *MD_GDAL_OGR_DOMAIN_NAME = "GDAL:OGR:domain_name";
41 :
42 : constexpr char ARROW_LETTER_BOOLEAN = 'b';
43 : constexpr char ARROW_LETTER_INT8 = 'c';
44 : constexpr char ARROW_LETTER_UINT8 = 'C';
45 : constexpr char ARROW_LETTER_INT16 = 's';
46 : constexpr char ARROW_LETTER_UINT16 = 'S';
47 : constexpr char ARROW_LETTER_INT32 = 'i';
48 : constexpr char ARROW_LETTER_UINT32 = 'I';
49 : constexpr char ARROW_LETTER_INT64 = 'l';
50 : constexpr char ARROW_LETTER_UINT64 = 'L';
51 : constexpr char ARROW_LETTER_FLOAT16 = 'e';
52 : constexpr char ARROW_LETTER_FLOAT32 = 'f';
53 : constexpr char ARROW_LETTER_FLOAT64 = 'g';
54 : constexpr char ARROW_LETTER_STRING = 'u';
55 : constexpr char ARROW_LETTER_LARGE_STRING = 'U';
56 : constexpr char ARROW_LETTER_BINARY = 'z';
57 : constexpr char ARROW_LETTER_LARGE_BINARY = 'Z';
58 : constexpr char ARROW_LETTER_DECIMAL = 'd';
59 : constexpr char ARROW_2ND_LETTER_LIST = 'l';
60 : constexpr char ARROW_2ND_LETTER_LARGE_LIST = 'L';
61 :
62 2749740 : static inline bool IsStructure(const char *format)
63 : {
64 2749740 : return format[0] == '+' && format[1] == 's' && format[2] == 0;
65 : }
66 :
67 23078 : static inline bool IsMap(const char *format)
68 : {
69 23078 : return format[0] == '+' && format[1] == 'm' && format[2] == 0;
70 : }
71 :
72 3145 : static inline bool IsFixedWidthBinary(const char *format)
73 : {
74 3145 : return format[0] == 'w' && format[1] == ':';
75 : }
76 :
77 202 : static inline int GetFixedWithBinary(const char *format)
78 : {
79 202 : return atoi(format + strlen("w:"));
80 : }
81 :
82 30285 : static inline bool IsList(const char *format)
83 : {
84 36406 : return format[0] == '+' && format[1] == ARROW_2ND_LETTER_LIST &&
85 36406 : format[2] == 0;
86 : }
87 :
88 20198 : static inline bool IsLargeList(const char *format)
89 : {
90 20322 : return format[0] == '+' && format[1] == ARROW_2ND_LETTER_LARGE_LIST &&
91 20322 : format[2] == 0;
92 : }
93 :
94 33142 : static inline bool IsFixedSizeList(const char *format)
95 : {
96 33142 : return format[0] == '+' && format[1] == 'w' && format[2] == ':';
97 : }
98 :
99 2311 : static inline int GetFixedSizeList(const char *format)
100 : {
101 2311 : return atoi(format + strlen("+w:"));
102 : }
103 :
104 2746 : static inline bool IsDecimal(const char *format)
105 : {
106 2746 : return format[0] == ARROW_LETTER_DECIMAL && format[1] == ':';
107 : }
108 :
109 1341130 : static inline bool IsBoolean(const char *format)
110 : {
111 1341130 : return format[0] == ARROW_LETTER_BOOLEAN && format[1] == 0;
112 : }
113 :
114 1337880 : static inline bool IsInt8(const char *format)
115 : {
116 1337880 : return format[0] == ARROW_LETTER_INT8 && format[1] == 0;
117 : }
118 :
119 1338010 : static inline bool IsUInt8(const char *format)
120 : {
121 1338010 : return format[0] == ARROW_LETTER_UINT8 && format[1] == 0;
122 : }
123 :
124 1336590 : static inline bool IsInt16(const char *format)
125 : {
126 1336590 : return format[0] == ARROW_LETTER_INT16 && format[1] == 0;
127 : }
128 :
129 1336690 : static inline bool IsUInt16(const char *format)
130 : {
131 1336690 : return format[0] == ARROW_LETTER_UINT16 && format[1] == 0;
132 : }
133 :
134 1395690 : static inline bool IsInt32(const char *format)
135 : {
136 1395690 : return format[0] == ARROW_LETTER_INT32 && format[1] == 0;
137 : }
138 :
139 1335270 : static inline bool IsUInt32(const char *format)
140 : {
141 1335270 : return format[0] == ARROW_LETTER_UINT32 && format[1] == 0;
142 : }
143 :
144 1388620 : static inline bool IsInt64(const char *format)
145 : {
146 1388620 : return format[0] == ARROW_LETTER_INT64 && format[1] == 0;
147 : }
148 :
149 1268030 : static inline bool IsUInt64(const char *format)
150 : {
151 1268030 : return format[0] == ARROW_LETTER_UINT64 && format[1] == 0;
152 : }
153 :
154 14922 : static inline bool IsFloat16(const char *format)
155 : {
156 14922 : return format[0] == ARROW_LETTER_FLOAT16 && format[1] == 0;
157 : }
158 :
159 1274440 : static inline bool IsFloat32(const char *format)
160 : {
161 1274440 : return format[0] == ARROW_LETTER_FLOAT32 && format[1] == 0;
162 : }
163 :
164 1266100 : static inline bool IsFloat64(const char *format)
165 : {
166 1266100 : return format[0] == ARROW_LETTER_FLOAT64 && format[1] == 0;
167 : }
168 :
169 2484810 : static inline bool IsString(const char *format)
170 : {
171 2484810 : return format[0] == ARROW_LETTER_STRING && format[1] == 0;
172 : }
173 :
174 73718 : static inline bool IsLargeString(const char *format)
175 : {
176 73718 : return format[0] == ARROW_LETTER_LARGE_STRING && format[1] == 0;
177 : }
178 :
179 78863 : static inline bool IsBinary(const char *format)
180 : {
181 78863 : return format[0] == ARROW_LETTER_BINARY && format[1] == 0;
182 : }
183 :
184 12856 : static inline bool IsLargeBinary(const char *format)
185 : {
186 12856 : return format[0] == ARROW_LETTER_LARGE_BINARY && format[1] == 0;
187 : }
188 :
189 11296 : static inline bool IsTimestampInternal(const char *format, char chType)
190 : {
191 13013 : return format[0] == 't' && format[1] == 's' && format[2] == chType &&
192 13013 : format[3] == ':';
193 : }
194 :
195 3542 : static inline bool IsTimestampSeconds(const char *format)
196 : {
197 3542 : return IsTimestampInternal(format, 's');
198 : }
199 :
200 3532 : static inline bool IsTimestampMilliseconds(const char *format)
201 : {
202 3532 : return IsTimestampInternal(format, 'm');
203 : }
204 :
205 2387 : static inline bool IsTimestampMicroseconds(const char *format)
206 : {
207 2387 : return IsTimestampInternal(format, 'u');
208 : }
209 :
210 1835 : static inline bool IsTimestampNanoseconds(const char *format)
211 : {
212 1835 : return IsTimestampInternal(format, 'n');
213 : }
214 :
215 2784 : static inline bool IsTimestamp(const char *format)
216 : {
217 7260 : return IsTimestampSeconds(format) || IsTimestampMilliseconds(format) ||
218 7260 : IsTimestampMicroseconds(format) || IsTimestampNanoseconds(format);
219 : }
220 :
221 107 : static inline const char *GetTimestampTimezone(const char *format)
222 : {
223 107 : return IsTimestamp(format) ? format + strlen("tm?:") : "";
224 : }
225 :
226 : /************************************************************************/
227 : /* TestBit() */
228 : /************************************************************************/
229 :
230 12933 : inline bool TestBit(const uint8_t *pabyData, size_t nIdx)
231 : {
232 12933 : return (pabyData[nIdx / 8] & (1 << (nIdx % 8))) != 0;
233 : }
234 :
235 : /************************************************************************/
236 : /* SetBit() */
237 : /************************************************************************/
238 :
239 9586 : inline void SetBit(uint8_t *pabyData, size_t nIdx)
240 : {
241 9586 : pabyData[nIdx / 8] |= (1 << (nIdx % 8));
242 9586 : }
243 :
244 : /************************************************************************/
245 : /* UnsetBit() */
246 : /************************************************************************/
247 :
248 12216 : inline void UnsetBit(uint8_t *pabyData, size_t nIdx)
249 : {
250 12216 : pabyData[nIdx / 8] &= uint8_t(~(1 << (nIdx % 8)));
251 12216 : }
252 :
253 : /************************************************************************/
254 : /* DefaultReleaseSchema() */
255 : /************************************************************************/
256 :
257 25135 : static void OGRLayerReleaseSchema(struct ArrowSchema *schema,
258 : bool bFullFreeFormat)
259 : {
260 25135 : CPLAssert(schema->release != nullptr);
261 25135 : if (bFullFreeFormat || STARTS_WITH(schema->format, "w:") ||
262 25104 : STARTS_WITH(schema->format, "tsm:"))
263 : {
264 1032 : CPLFree(const_cast<char *>(schema->format));
265 : }
266 25135 : CPLFree(const_cast<char *>(schema->name));
267 25135 : CPLFree(const_cast<char *>(schema->metadata));
268 25135 : if (schema->children)
269 : {
270 25756 : for (int i = 0; i < static_cast<int>(schema->n_children); ++i)
271 : {
272 22608 : if (schema->children[i] && schema->children[i]->release)
273 : {
274 22608 : schema->children[i]->release(schema->children[i]);
275 22608 : CPLFree(schema->children[i]);
276 : }
277 : }
278 3148 : CPLFree(schema->children);
279 : }
280 25135 : if (schema->dictionary)
281 : {
282 31 : if (schema->dictionary->release)
283 : {
284 31 : schema->dictionary->release(schema->dictionary);
285 31 : CPLFree(schema->dictionary);
286 : }
287 : }
288 25135 : schema->release = nullptr;
289 25135 : }
290 :
291 25112 : static void OGRLayerPartialReleaseSchema(struct ArrowSchema *schema)
292 : {
293 25112 : OGRLayerReleaseSchema(schema, /* bFullFreeFormat = */ false);
294 25112 : }
295 :
296 23 : static void OGRLayerFullReleaseSchema(struct ArrowSchema *schema)
297 : {
298 23 : OGRLayerReleaseSchema(schema, /* bFullFreeFormat = */ true);
299 23 : }
300 :
301 : /** Release a ArrowSchema.
302 : *
303 : * To be used by driver implementations that have a custom GetArrowStream()
304 : * implementation.
305 : *
306 : * @param schema Schema to release.
307 : * @since GDAL 3.6
308 : */
309 :
310 25081 : void OGRLayer::ReleaseSchema(struct ArrowSchema *schema)
311 : {
312 25081 : OGRLayerPartialReleaseSchema(schema);
313 25081 : }
314 :
315 : /************************************************************************/
316 : /* AddDictToSchema() */
317 : /************************************************************************/
318 :
319 31 : static void AddDictToSchema(struct ArrowSchema *psChild,
320 : const OGRCodedFieldDomain *poCodedDomain)
321 : {
322 31 : const OGRCodedValue *psIter = poCodedDomain->GetEnumeration();
323 31 : int nLastCode = -1;
324 31 : int nCountNull = 0;
325 31 : uint32_t nCountChars = 0;
326 108 : for (; psIter->pszCode; ++psIter)
327 : {
328 77 : if (CPLGetValueType(psIter->pszCode) != CPL_VALUE_INTEGER)
329 : {
330 0 : return;
331 : }
332 77 : int nCode = atoi(psIter->pszCode);
333 77 : if (nCode <= nLastCode || nCode - nLastCode > 100)
334 : {
335 0 : return;
336 : }
337 103 : for (int i = nLastCode + 1; i < nCode; ++i)
338 : {
339 26 : nCountNull++;
340 : }
341 77 : if (psIter->pszValue != nullptr)
342 : {
343 51 : const size_t nLen = strlen(psIter->pszValue);
344 51 : if (nLen > std::numeric_limits<uint32_t>::max() - nCountChars)
345 0 : return;
346 51 : nCountChars += static_cast<uint32_t>(nLen);
347 : }
348 : else
349 26 : nCountNull++;
350 77 : nLastCode = nCode;
351 : }
352 :
353 : auto psChildDict = static_cast<struct ArrowSchema *>(
354 31 : CPLCalloc(1, sizeof(struct ArrowSchema)));
355 31 : psChild->dictionary = psChildDict;
356 31 : psChildDict->release = OGRLayerPartialReleaseSchema;
357 31 : psChildDict->name = CPLStrdup(poCodedDomain->GetName().c_str());
358 31 : psChildDict->format = "u";
359 31 : if (nCountNull)
360 26 : psChildDict->flags = ARROW_FLAG_NULLABLE;
361 : }
362 :
363 : /************************************************************************/
364 : /* DefaultGetArrowSchema() */
365 : /************************************************************************/
366 :
367 : /** Default implementation of the ArrowArrayStream::get_schema() callback.
368 : *
369 : * To be used by driver implementations that have a custom GetArrowStream()
370 : * implementation.
371 : *
372 : * @since GDAL 3.6
373 : */
374 2162 : int OGRLayer::GetArrowSchema(struct ArrowArrayStream *,
375 : struct ArrowSchema *out_schema)
376 : {
377 2162 : const bool bIncludeFID = CPLTestBool(
378 : m_aosArrowArrayStreamOptions.FetchNameValueDef("INCLUDE_FID", "YES"));
379 2162 : memset(out_schema, 0, sizeof(*out_schema));
380 2162 : out_schema->format = "+s";
381 2162 : out_schema->name = CPLStrdup("");
382 2162 : out_schema->metadata = nullptr;
383 2162 : auto poLayerDefn = GetLayerDefn();
384 2162 : const int nFieldCount = poLayerDefn->GetFieldCount();
385 2162 : const int nGeomFieldCount = poLayerDefn->GetGeomFieldCount();
386 2162 : const int nChildren = 1 + nFieldCount + nGeomFieldCount;
387 :
388 2162 : out_schema->children = static_cast<struct ArrowSchema **>(
389 2162 : CPLCalloc(nChildren, sizeof(struct ArrowSchema *)));
390 2162 : int iSchemaChild = 0;
391 2162 : if (bIncludeFID)
392 : {
393 3842 : out_schema->children[iSchemaChild] = static_cast<struct ArrowSchema *>(
394 1921 : CPLCalloc(1, sizeof(struct ArrowSchema)));
395 1921 : auto psChild = out_schema->children[iSchemaChild];
396 1921 : ++iSchemaChild;
397 1921 : psChild->release = OGRLayer::ReleaseSchema;
398 1921 : const char *pszFIDName = GetFIDColumn();
399 1921 : psChild->name =
400 1921 : CPLStrdup((pszFIDName && pszFIDName[0]) ? pszFIDName
401 : : DEFAULT_ARROW_FID_NAME);
402 1921 : psChild->format = "l";
403 : }
404 20018 : for (int i = 0; i < nFieldCount; ++i)
405 : {
406 17856 : const auto poFieldDefn = poLayerDefn->GetFieldDefn(i);
407 17856 : if (poFieldDefn->IsIgnored())
408 : {
409 40 : continue;
410 : }
411 :
412 35632 : out_schema->children[iSchemaChild] = static_cast<struct ArrowSchema *>(
413 17816 : CPLCalloc(1, sizeof(struct ArrowSchema)));
414 17816 : auto psChild = out_schema->children[iSchemaChild];
415 17816 : ++iSchemaChild;
416 17816 : psChild->release = OGRLayer::ReleaseSchema;
417 17816 : psChild->name = CPLStrdup(poFieldDefn->GetNameRef());
418 17816 : if (poFieldDefn->IsNullable())
419 17042 : psChild->flags = ARROW_FLAG_NULLABLE;
420 17816 : const auto eType = poFieldDefn->GetType();
421 17816 : const auto eSubType = poFieldDefn->GetSubType();
422 17816 : const char *item_format = nullptr;
423 17816 : switch (eType)
424 : {
425 5848 : case OFTInteger:
426 : {
427 5848 : if (eSubType == OFSTBoolean)
428 285 : psChild->format = "b";
429 5563 : else if (eSubType == OFSTInt16)
430 672 : psChild->format = "s";
431 : else
432 4891 : psChild->format = "i";
433 :
434 5848 : const auto &osDomainName = poFieldDefn->GetDomainName();
435 5848 : if (!osDomainName.empty())
436 : {
437 31 : auto poDS = GetDataset();
438 31 : if (poDS)
439 : {
440 : const auto poFieldDomain =
441 31 : poDS->GetFieldDomain(osDomainName);
442 62 : if (poFieldDomain &&
443 31 : poFieldDomain->GetDomainType() == OFDT_CODED)
444 : {
445 31 : const OGRCodedFieldDomain *poCodedDomain =
446 : static_cast<const OGRCodedFieldDomain *>(
447 : poFieldDomain);
448 31 : AddDictToSchema(psChild, poCodedDomain);
449 : }
450 : }
451 : }
452 :
453 5848 : break;
454 : }
455 :
456 501 : case OFTInteger64:
457 501 : psChild->format = "l";
458 501 : break;
459 :
460 2836 : case OFTReal:
461 : {
462 2836 : if (eSubType == OFSTFloat32)
463 675 : psChild->format = "f";
464 : else
465 2161 : psChild->format = "g";
466 2836 : break;
467 : }
468 :
469 5119 : case OFTString:
470 : case OFTWideString:
471 5119 : psChild->format = "u";
472 5119 : break;
473 :
474 1206 : case OFTBinary:
475 : {
476 1206 : if (poFieldDefn->GetWidth() > 0)
477 8 : psChild->format =
478 8 : CPLStrdup(CPLSPrintf("w:%d", poFieldDefn->GetWidth()));
479 : else
480 1198 : psChild->format = "z";
481 1206 : break;
482 : }
483 :
484 371 : case OFTIntegerList:
485 : {
486 371 : if (eSubType == OFSTBoolean)
487 90 : item_format = "b";
488 281 : else if (eSubType == OFSTInt16)
489 67 : item_format = "s";
490 : else
491 214 : item_format = "i";
492 371 : break;
493 : }
494 :
495 92 : case OFTInteger64List:
496 92 : item_format = "l";
497 92 : break;
498 :
499 248 : case OFTRealList:
500 : {
501 248 : if (eSubType == OFSTFloat32)
502 82 : item_format = "f";
503 : else
504 166 : item_format = "g";
505 248 : break;
506 : }
507 :
508 270 : case OFTStringList:
509 : case OFTWideStringList:
510 270 : item_format = "u";
511 270 : break;
512 :
513 208 : case OFTDate:
514 208 : psChild->format = "tdD";
515 208 : break;
516 :
517 116 : case OFTTime:
518 116 : psChild->format = "ttm";
519 116 : break;
520 :
521 1001 : case OFTDateTime:
522 : {
523 1001 : const char *pszPrefix = "tsm:";
524 : const char *pszTZOverride =
525 1001 : m_aosArrowArrayStreamOptions.FetchNameValue("TIMEZONE");
526 1001 : if (pszTZOverride && EQUAL(pszTZOverride, "unknown"))
527 : {
528 2 : psChild->format = CPLStrdup(pszPrefix);
529 : }
530 999 : else if (pszTZOverride)
531 : {
532 40 : psChild->format = CPLStrdup(
533 80 : (std::string(pszPrefix) + pszTZOverride).c_str());
534 : }
535 : else
536 : {
537 959 : const int nTZFlag = poFieldDefn->GetTZFlag();
538 959 : if (nTZFlag == OGR_TZFLAG_MIXED_TZ ||
539 : nTZFlag == OGR_TZFLAG_UTC)
540 : {
541 7 : psChild->format =
542 7 : CPLStrdup(CPLSPrintf("%sUTC", pszPrefix));
543 : }
544 952 : else if (nTZFlag == OGR_TZFLAG_UNKNOWN ||
545 : nTZFlag == OGR_TZFLAG_LOCALTIME)
546 : {
547 936 : psChild->format = CPLStrdup(pszPrefix);
548 : }
549 : else
550 : {
551 16 : psChild->format = CPLStrdup(
552 32 : (pszPrefix + OGRTZFlagToTimezone(nTZFlag, "UTC"))
553 : .c_str());
554 : }
555 : }
556 1001 : break;
557 : }
558 : }
559 :
560 17816 : if (item_format)
561 : {
562 981 : psChild->format = "+l";
563 981 : psChild->n_children = 1;
564 981 : psChild->children = static_cast<struct ArrowSchema **>(
565 981 : CPLCalloc(1, sizeof(struct ArrowSchema *)));
566 1962 : psChild->children[0] = static_cast<struct ArrowSchema *>(
567 981 : CPLCalloc(1, sizeof(struct ArrowSchema)));
568 981 : psChild->children[0]->release = OGRLayer::ReleaseSchema;
569 981 : psChild->children[0]->name = CPLStrdup("item");
570 981 : psChild->children[0]->format = item_format;
571 : }
572 :
573 35632 : std::vector<std::pair<std::string, std::string>> oMetadata;
574 17816 : const char *pszAlternativeName = poFieldDefn->GetAlternativeNameRef();
575 17816 : if (pszAlternativeName && pszAlternativeName[0])
576 : oMetadata.emplace_back(
577 262 : std::pair(MD_GDAL_OGR_ALTERNATIVE_NAME, pszAlternativeName));
578 :
579 17816 : const char *pszDefault = poFieldDefn->GetDefault();
580 17816 : if (pszDefault && pszDefault[0])
581 42 : oMetadata.emplace_back(std::pair(MD_GDAL_OGR_DEFAULT, pszDefault));
582 :
583 17816 : const std::string &osComment = poFieldDefn->GetComment();
584 17816 : if (!osComment.empty())
585 10 : oMetadata.emplace_back(std::pair(MD_GDAL_OGR_COMMENT, osComment));
586 :
587 17816 : if (eType == OFTString && eSubType == OFSTJSON)
588 : {
589 111 : oMetadata.emplace_back(
590 111 : std::pair(ARROW_EXTENSION_NAME_KEY, EXTENSION_NAME_ARROW_JSON));
591 : }
592 17705 : else if (eSubType != OFSTNone && eSubType != OFSTBoolean &&
593 : eSubType != OFSTFloat32)
594 : {
595 0 : oMetadata.emplace_back(std::pair(
596 740 : MD_GDAL_OGR_SUBTYPE, OGR_GetFieldSubTypeName(eSubType)));
597 : }
598 17816 : if (eType == OFTString && poFieldDefn->GetWidth() > 0)
599 : {
600 0 : oMetadata.emplace_back(std::pair(
601 661 : MD_GDAL_OGR_WIDTH, CPLSPrintf("%d", poFieldDefn->GetWidth())));
602 : }
603 17816 : if (poFieldDefn->IsUnique())
604 : {
605 10 : oMetadata.emplace_back(std::pair(MD_GDAL_OGR_UNIQUE, "true"));
606 : }
607 17816 : if (!poFieldDefn->GetDomainName().empty())
608 : {
609 62 : oMetadata.emplace_back(std::pair(MD_GDAL_OGR_DOMAIN_NAME,
610 62 : poFieldDefn->GetDomainName()));
611 : }
612 :
613 17816 : if (!oMetadata.empty())
614 : {
615 1837 : uint64_t nLen64 = sizeof(int32_t);
616 3704 : for (const auto &oPair : oMetadata)
617 : {
618 1867 : nLen64 += sizeof(int32_t);
619 1867 : nLen64 += oPair.first.size();
620 1867 : nLen64 += sizeof(int32_t);
621 1867 : nLen64 += oPair.second.size();
622 : }
623 1837 : if (nLen64 <
624 1837 : static_cast<uint64_t>(std::numeric_limits<int32_t>::max()))
625 : {
626 1837 : const size_t nLen = static_cast<size_t>(nLen64);
627 1837 : char *pszMetadata = static_cast<char *>(CPLMalloc(nLen));
628 1837 : psChild->metadata = pszMetadata;
629 1837 : size_t offsetMD = 0;
630 1837 : int32_t nSize = static_cast<int>(oMetadata.size());
631 1837 : memcpy(pszMetadata + offsetMD, &nSize, sizeof(nSize));
632 1837 : offsetMD += sizeof(int32_t);
633 3704 : for (const auto &oPair : oMetadata)
634 : {
635 1867 : nSize = static_cast<int32_t>(oPair.first.size());
636 1867 : memcpy(pszMetadata + offsetMD, &nSize, sizeof(nSize));
637 1867 : offsetMD += sizeof(int32_t);
638 1867 : memcpy(pszMetadata + offsetMD, oPair.first.data(),
639 : oPair.first.size());
640 1867 : offsetMD += oPair.first.size();
641 :
642 1867 : nSize = static_cast<int32_t>(oPair.second.size());
643 1867 : memcpy(pszMetadata + offsetMD, &nSize, sizeof(nSize));
644 1867 : offsetMD += sizeof(int32_t);
645 1867 : memcpy(pszMetadata + offsetMD, oPair.second.data(),
646 : oPair.second.size());
647 1867 : offsetMD += oPair.second.size();
648 : }
649 :
650 1837 : CPLAssert(offsetMD == nLen);
651 1837 : CPL_IGNORE_RET_VAL(offsetMD);
652 : }
653 : else
654 : {
655 : // Extremely unlikely !
656 0 : CPLError(CE_Warning, CPLE_AppDefined,
657 : "Cannot write ArrowSchema::metadata due to "
658 : "too large content");
659 : }
660 : }
661 : }
662 :
663 : const char *const pszGeometryMetadataEncoding =
664 2162 : m_aosArrowArrayStreamOptions.FetchNameValue(
665 : "GEOMETRY_METADATA_ENCODING");
666 2162 : const char *pszExtensionName = EXTENSION_NAME_OGC_WKB;
667 2162 : if (pszGeometryMetadataEncoding)
668 : {
669 4 : if (EQUAL(pszGeometryMetadataEncoding, "OGC"))
670 0 : pszExtensionName = EXTENSION_NAME_OGC_WKB;
671 4 : else if (EQUAL(pszGeometryMetadataEncoding, "GEOARROW"))
672 4 : pszExtensionName = EXTENSION_NAME_GEOARROW_WKB;
673 : else
674 0 : CPLError(CE_Warning, CPLE_NotSupported,
675 : "Unsupported GEOMETRY_METADATA_ENCODING value: %s",
676 : pszGeometryMetadataEncoding);
677 : }
678 4049 : for (int i = 0; i < nGeomFieldCount; ++i)
679 : {
680 1887 : const auto poFieldDefn = poLayerDefn->GetGeomFieldDefn(i);
681 1887 : if (poFieldDefn->IsIgnored())
682 : {
683 15 : continue;
684 : }
685 :
686 1872 : out_schema->children[iSchemaChild] = CreateSchemaForWKBGeometryColumn(
687 : poFieldDefn, "z", pszExtensionName);
688 :
689 1872 : ++iSchemaChild;
690 : }
691 :
692 2162 : out_schema->n_children = iSchemaChild;
693 2162 : out_schema->release = OGRLayer::ReleaseSchema;
694 2162 : return 0;
695 : }
696 :
697 : /************************************************************************/
698 : /* CreateSchemaForWKBGeometryColumn() */
699 : /************************************************************************/
700 :
701 : /** Return a ArrowSchema* corresponding to the WKB encoding of a geometry
702 : * column.
703 : */
704 :
705 : /* static */
706 : struct ArrowSchema *
707 2201 : OGRLayer::CreateSchemaForWKBGeometryColumn(const OGRGeomFieldDefn *poFieldDefn,
708 : const char *pszArrowFormat,
709 : const char *pszExtensionName)
710 : {
711 2201 : CPLAssert(strcmp(pszArrowFormat, "z") == 0 ||
712 : strcmp(pszArrowFormat, "Z") == 0);
713 2201 : if (!EQUAL(pszExtensionName, EXTENSION_NAME_OGC_WKB) &&
714 4 : !EQUAL(pszExtensionName, EXTENSION_NAME_GEOARROW_WKB))
715 : {
716 0 : CPLError(CE_Failure, CPLE_NotSupported,
717 : "Unsupported extension name '%s'. Defaulting to '%s'",
718 : pszExtensionName, EXTENSION_NAME_OGC_WKB);
719 0 : pszExtensionName = EXTENSION_NAME_OGC_WKB;
720 : }
721 : auto psSchema = static_cast<struct ArrowSchema *>(
722 2201 : CPLCalloc(1, sizeof(struct ArrowSchema)));
723 2201 : psSchema->release = OGRLayer::ReleaseSchema;
724 2201 : const char *pszGeomFieldName = poFieldDefn->GetNameRef();
725 2201 : if (pszGeomFieldName[0] == '\0')
726 761 : pszGeomFieldName = DEFAULT_ARROW_GEOMETRY_NAME;
727 2201 : psSchema->name = CPLStrdup(pszGeomFieldName);
728 2201 : if (poFieldDefn->IsNullable())
729 2172 : psSchema->flags = ARROW_FLAG_NULLABLE;
730 2201 : psSchema->format = strcmp(pszArrowFormat, "z") == 0 ? "z" : "Z";
731 2201 : std::string osExtensionMetadata;
732 2201 : if (EQUAL(pszExtensionName, EXTENSION_NAME_GEOARROW_WKB))
733 : {
734 4 : const auto poSRS = poFieldDefn->GetSpatialRef();
735 4 : if (poSRS)
736 : {
737 2 : char *pszPROJJSON = nullptr;
738 2 : poSRS->exportToPROJJSON(&pszPROJJSON, nullptr);
739 2 : if (pszPROJJSON)
740 : {
741 2 : osExtensionMetadata = "{\"crs\":";
742 2 : osExtensionMetadata += pszPROJJSON;
743 2 : osExtensionMetadata += '}';
744 2 : CPLFree(pszPROJJSON);
745 : }
746 : else
747 : {
748 0 : CPLError(CE_Warning, CPLE_AppDefined,
749 : "Cannot export CRS of geometry field %s to PROJJSON",
750 : poFieldDefn->GetNameRef());
751 : }
752 : }
753 : }
754 2201 : size_t nLen = sizeof(int32_t) + sizeof(int32_t) +
755 : strlen(ARROW_EXTENSION_NAME_KEY) + sizeof(int32_t) +
756 2201 : strlen(pszExtensionName);
757 2201 : if (!osExtensionMetadata.empty())
758 : {
759 2 : nLen += sizeof(int32_t) + strlen(ARROW_EXTENSION_METADATA_KEY) +
760 2 : sizeof(int32_t) + osExtensionMetadata.size();
761 : }
762 2201 : char *pszMetadata = static_cast<char *>(CPLMalloc(nLen));
763 2201 : psSchema->metadata = pszMetadata;
764 2201 : size_t offsetMD = 0;
765 2201 : *reinterpret_cast<int32_t *>(pszMetadata + offsetMD) =
766 2201 : osExtensionMetadata.empty() ? 1 : 2;
767 2201 : offsetMD += sizeof(int32_t);
768 2201 : *reinterpret_cast<int32_t *>(pszMetadata + offsetMD) =
769 : static_cast<int32_t>(strlen(ARROW_EXTENSION_NAME_KEY));
770 2201 : offsetMD += sizeof(int32_t);
771 2201 : memcpy(pszMetadata + offsetMD, ARROW_EXTENSION_NAME_KEY,
772 : strlen(ARROW_EXTENSION_NAME_KEY));
773 2201 : offsetMD += static_cast<int>(strlen(ARROW_EXTENSION_NAME_KEY));
774 2201 : *reinterpret_cast<int32_t *>(pszMetadata + offsetMD) =
775 2201 : static_cast<int32_t>(strlen(pszExtensionName));
776 2201 : offsetMD += sizeof(int32_t);
777 2201 : memcpy(pszMetadata + offsetMD, pszExtensionName, strlen(pszExtensionName));
778 2201 : offsetMD += strlen(pszExtensionName);
779 2201 : if (!osExtensionMetadata.empty())
780 : {
781 2 : *reinterpret_cast<int32_t *>(pszMetadata + offsetMD) =
782 : static_cast<int32_t>(strlen(ARROW_EXTENSION_METADATA_KEY));
783 2 : offsetMD += sizeof(int32_t);
784 2 : memcpy(pszMetadata + offsetMD, ARROW_EXTENSION_METADATA_KEY,
785 : strlen(ARROW_EXTENSION_METADATA_KEY));
786 2 : offsetMD += static_cast<int>(strlen(ARROW_EXTENSION_METADATA_KEY));
787 2 : *reinterpret_cast<int32_t *>(pszMetadata + offsetMD) =
788 2 : static_cast<int32_t>(osExtensionMetadata.size());
789 2 : offsetMD += sizeof(int32_t);
790 2 : memcpy(pszMetadata + offsetMD, osExtensionMetadata.c_str(),
791 : osExtensionMetadata.size());
792 2 : offsetMD += osExtensionMetadata.size();
793 : }
794 2201 : CPLAssert(offsetMD == nLen);
795 2201 : CPL_IGNORE_RET_VAL(offsetMD);
796 4402 : return psSchema;
797 : }
798 :
799 : /************************************************************************/
800 : /* StaticGetArrowSchema() */
801 : /************************************************************************/
802 :
803 : /** Default implementation of the ArrowArrayStream::get_schema() callback.
804 : *
805 : * To be used by driver implementations that have a custom GetArrowStream()
806 : * implementation.
807 : *
808 : * @since GDAL 3.6
809 : */
810 2382 : int OGRLayer::StaticGetArrowSchema(struct ArrowArrayStream *stream,
811 : struct ArrowSchema *out_schema)
812 : {
813 : auto poLayer = static_cast<ArrowArrayStreamPrivateDataSharedDataWrapper *>(
814 2382 : stream->private_data)
815 2382 : ->poShared->m_poLayer;
816 2382 : if (poLayer == nullptr)
817 : {
818 1 : CPLError(CE_Failure, CPLE_NotSupported,
819 : "Calling get_schema() on a freed OGRLayer is not supported");
820 1 : return EINVAL;
821 : }
822 2381 : return poLayer->GetArrowSchema(stream, out_schema);
823 : }
824 :
825 : /************************************************************************/
826 : /* DefaultReleaseArray() */
827 : /************************************************************************/
828 :
829 34611 : static void OGRLayerDefaultReleaseArray(struct ArrowArray *array)
830 : {
831 34611 : if (array->buffers)
832 : {
833 108785 : for (int i = 0; i < static_cast<int>(array->n_buffers); ++i)
834 74174 : VSIFreeAligned(const_cast<void *>(array->buffers[i]));
835 34611 : CPLFree(array->buffers);
836 : }
837 34611 : if (array->children)
838 : {
839 40525 : for (int i = 0; i < static_cast<int>(array->n_children); ++i)
840 : {
841 32854 : if (array->children[i] && array->children[i]->release)
842 : {
843 32481 : array->children[i]->release(array->children[i]);
844 32481 : CPLFree(array->children[i]);
845 : }
846 : }
847 7671 : CPLFree(array->children);
848 : }
849 34611 : if (array->dictionary)
850 : {
851 147 : if (array->dictionary->release)
852 : {
853 147 : array->dictionary->release(array->dictionary);
854 147 : CPLFree(array->dictionary);
855 : }
856 : }
857 34611 : array->release = nullptr;
858 34611 : }
859 :
860 : /** Release a ArrowArray.
861 : *
862 : * To be used by driver implementations that have a custom GetArrowStream()
863 : * implementation.
864 : *
865 : * @param array Arrow array to release.
866 : * @since GDAL 3.6
867 : */
868 3858 : void OGRLayer::ReleaseArray(struct ArrowArray *array)
869 : {
870 3858 : OGRLayerDefaultReleaseArray(array);
871 3858 : }
872 :
873 : /************************************************************************/
874 : /* IsValidField() */
875 : /************************************************************************/
876 :
877 88607 : static inline bool IsValidField(const OGRField *psRawField)
878 : {
879 103735 : return (!(psRawField->Set.nMarker1 == OGRUnsetMarker &&
880 7564 : psRawField->Set.nMarker2 == OGRUnsetMarker &&
881 177214 : psRawField->Set.nMarker3 == OGRUnsetMarker) &&
882 81043 : !(psRawField->Set.nMarker1 == OGRNullMarker &&
883 3144 : psRawField->Set.nMarker2 == OGRNullMarker &&
884 91751 : psRawField->Set.nMarker3 == OGRNullMarker));
885 : }
886 :
887 : /************************************************************************/
888 : /* AllocValidityBitmap() */
889 : /************************************************************************/
890 :
891 3448 : static uint8_t *AllocValidityBitmap(size_t nSize)
892 : {
893 : auto pabyValidity = static_cast<uint8_t *>(
894 3448 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE((1 + nSize + 7) / 8));
895 3448 : if (pabyValidity)
896 : {
897 : // All valid initially
898 3448 : memset(pabyValidity, 0xFF, (nSize + 7) / 8);
899 : }
900 3448 : return pabyValidity;
901 : }
902 :
903 : /************************************************************************/
904 : /* FillArray() */
905 : /************************************************************************/
906 :
907 : template <class T, typename TMember>
908 5835 : static bool FillArray(struct ArrowArray *psChild,
909 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
910 : const size_t nFeatureCountLimit, const bool bIsNullable,
911 : TMember member, const int i)
912 : {
913 5835 : psChild->n_buffers = 2;
914 5835 : psChild->buffers = static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
915 5835 : uint8_t *pabyValidity = nullptr;
916 : T *panValues = static_cast<T *>(
917 5835 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(T) * (1 + nFeatureCountLimit)));
918 5835 : if (panValues == nullptr)
919 0 : return false;
920 5835 : psChild->buffers[1] = panValues;
921 54191 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat)
922 : {
923 48356 : auto &poFeature = apoFeatures[iFeat];
924 48356 : const auto psRawField = poFeature->GetRawFieldRef(i);
925 48356 : if (IsValidField(psRawField))
926 : {
927 43455 : panValues[iFeat] = static_cast<T>((*psRawField).*member);
928 : }
929 4901 : else if (bIsNullable)
930 : {
931 4901 : panValues[iFeat] = 0;
932 4901 : ++psChild->null_count;
933 4901 : if (pabyValidity == nullptr)
934 : {
935 1218 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
936 1218 : psChild->buffers[0] = pabyValidity;
937 1218 : if (pabyValidity == nullptr)
938 0 : return false;
939 : }
940 4901 : UnsetBit(pabyValidity, iFeat);
941 : }
942 : else
943 : {
944 0 : panValues[iFeat] = 0;
945 : }
946 : }
947 5835 : return true;
948 : }
949 :
950 : /************************************************************************/
951 : /* FillBoolArray() */
952 : /************************************************************************/
953 :
954 : template <typename TMember>
955 137 : static bool FillBoolArray(struct ArrowArray *psChild,
956 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
957 : const size_t nFeatureCountLimit,
958 : const bool bIsNullable, TMember member, const int i)
959 : {
960 137 : psChild->n_buffers = 2;
961 137 : psChild->buffers = static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
962 137 : uint8_t *pabyValidity = nullptr;
963 : uint8_t *panValues = static_cast<uint8_t *>(
964 137 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE((nFeatureCountLimit + 7 + 1) / 8));
965 137 : if (panValues == nullptr)
966 0 : return false;
967 137 : memset(panValues, 0, (nFeatureCountLimit + 7) / 8);
968 137 : psChild->buffers[1] = panValues;
969 595 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat)
970 : {
971 458 : auto &poFeature = apoFeatures[iFeat];
972 458 : const auto psRawField = poFeature->GetRawFieldRef(i);
973 458 : if (IsValidField(psRawField))
974 : {
975 401 : if ((*psRawField).*member)
976 79 : SetBit(panValues, iFeat);
977 : }
978 57 : else if (bIsNullable)
979 : {
980 57 : ++psChild->null_count;
981 57 : if (pabyValidity == nullptr)
982 : {
983 45 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
984 45 : psChild->buffers[0] = pabyValidity;
985 45 : if (pabyValidity == nullptr)
986 0 : return false;
987 : }
988 57 : UnsetBit(pabyValidity, iFeat);
989 : }
990 : }
991 137 : return true;
992 : }
993 :
994 : /************************************************************************/
995 : /* FillListArray() */
996 : /************************************************************************/
997 :
998 : struct GetFromIntegerList
999 : {
1000 447 : static inline int getCount(const OGRField *psRawField)
1001 : {
1002 447 : return psRawField->IntegerList.nCount;
1003 : }
1004 :
1005 222 : static inline const int *getValues(const OGRField *psRawField)
1006 : {
1007 222 : return psRawField->IntegerList.paList;
1008 : }
1009 : };
1010 :
1011 : struct GetFromInteger64List
1012 : {
1013 196 : static inline int getCount(const OGRField *psRawField)
1014 : {
1015 196 : return psRawField->Integer64List.nCount;
1016 : }
1017 :
1018 97 : static inline const GIntBig *getValues(const OGRField *psRawField)
1019 : {
1020 97 : return psRawField->Integer64List.paList;
1021 : }
1022 : };
1023 :
1024 : struct GetFromRealList
1025 : {
1026 304 : static inline int getCount(const OGRField *psRawField)
1027 : {
1028 304 : return psRawField->RealList.nCount;
1029 : }
1030 :
1031 151 : static inline const double *getValues(const OGRField *psRawField)
1032 : {
1033 151 : return psRawField->RealList.paList;
1034 : }
1035 : };
1036 :
1037 : template <class OffsetType, class T, class GetFromList>
1038 : static size_t
1039 393 : FillListArray(struct ArrowArray *psChild,
1040 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1041 : const size_t nFeatureCountLimit, const bool bIsNullable,
1042 : const int i, const size_t nMemLimit)
1043 : {
1044 393 : psChild->n_buffers = 2;
1045 393 : psChild->buffers = static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1046 393 : uint8_t *pabyValidity = nullptr;
1047 : OffsetType *panOffsets =
1048 393 : static_cast<OffsetType *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
1049 : sizeof(OffsetType) * (1 + nFeatureCountLimit)));
1050 393 : if (panOffsets == nullptr)
1051 0 : return 0;
1052 393 : psChild->buffers[1] = panOffsets;
1053 :
1054 393 : OffsetType nOffset = 0;
1055 393 : size_t nFeatCount = 0;
1056 1307 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat, ++nFeatCount)
1057 : {
1058 920 : panOffsets[iFeat] = nOffset;
1059 920 : auto &poFeature = apoFeatures[iFeat];
1060 920 : const auto psRawField = poFeature->GetRawFieldRef(i);
1061 920 : if (IsValidField(psRawField))
1062 : {
1063 426 : const unsigned nCount = GetFromList::getCount(psRawField);
1064 426 : if (nCount > static_cast<size_t>(nMemLimit - nOffset))
1065 : {
1066 6 : if (nFeatCount == 0)
1067 3 : return 0;
1068 3 : break;
1069 : }
1070 420 : nOffset += static_cast<OffsetType>(nCount);
1071 : }
1072 494 : else if (bIsNullable)
1073 : {
1074 494 : ++psChild->null_count;
1075 494 : if (pabyValidity == nullptr)
1076 : {
1077 219 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1078 219 : psChild->buffers[0] = pabyValidity;
1079 219 : if (pabyValidity == nullptr)
1080 0 : return 0;
1081 : }
1082 494 : UnsetBit(pabyValidity, iFeat);
1083 : }
1084 : }
1085 390 : panOffsets[nFeatCount] = nOffset;
1086 :
1087 390 : psChild->n_children = 1;
1088 390 : psChild->children = static_cast<struct ArrowArray **>(
1089 390 : CPLCalloc(1, sizeof(struct ArrowArray *)));
1090 780 : psChild->children[0] = static_cast<struct ArrowArray *>(
1091 390 : CPLCalloc(1, sizeof(struct ArrowArray)));
1092 390 : auto psValueChild = psChild->children[0];
1093 :
1094 390 : psValueChild->release = OGRLayerDefaultReleaseArray;
1095 390 : psValueChild->n_buffers = 2;
1096 390 : psValueChild->buffers =
1097 390 : static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1098 390 : psValueChild->length = nOffset;
1099 : T *panValues = static_cast<T *>(
1100 390 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(T) * (nOffset + 1)));
1101 390 : if (panValues == nullptr)
1102 0 : return 0;
1103 390 : psValueChild->buffers[1] = panValues;
1104 :
1105 390 : nOffset = 0;
1106 1304 : for (size_t iFeat = 0; iFeat < nFeatCount; ++iFeat)
1107 : {
1108 914 : auto &poFeature = apoFeatures[iFeat];
1109 914 : const auto psRawField = poFeature->GetRawFieldRef(i);
1110 914 : if (IsValidField(psRawField))
1111 : {
1112 420 : const int nCount = GetFromList::getCount(psRawField);
1113 420 : const auto paList = GetFromList::getValues(psRawField);
1114 : if (sizeof(*paList) == sizeof(T))
1115 362 : memcpy(panValues + nOffset, paList, nCount * sizeof(T));
1116 : else
1117 : {
1118 176 : for (int j = 0; j < nCount; ++j)
1119 : {
1120 118 : panValues[nOffset + j] = static_cast<T>(paList[j]);
1121 : }
1122 : }
1123 420 : nOffset += static_cast<OffsetType>(nCount);
1124 : }
1125 : }
1126 :
1127 390 : return nFeatCount;
1128 : }
1129 :
1130 : template <class OffsetType, class GetFromList>
1131 : static size_t
1132 47 : FillListArrayBool(struct ArrowArray *psChild,
1133 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1134 : const size_t nFeatureCountLimit, const bool bIsNullable,
1135 : const int i, const size_t nMemLimit)
1136 : {
1137 47 : psChild->n_buffers = 2;
1138 47 : psChild->buffers = static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1139 47 : uint8_t *pabyValidity = nullptr;
1140 : OffsetType *panOffsets =
1141 47 : static_cast<OffsetType *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
1142 : sizeof(OffsetType) * (1 + nFeatureCountLimit)));
1143 47 : if (panOffsets == nullptr)
1144 0 : return 0;
1145 47 : psChild->buffers[1] = panOffsets;
1146 :
1147 47 : OffsetType nOffset = 0;
1148 47 : size_t nFeatCount = 0;
1149 126 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat, ++nFeatCount)
1150 : {
1151 81 : panOffsets[iFeat] = nOffset;
1152 81 : auto &poFeature = apoFeatures[iFeat];
1153 81 : const auto psRawField = poFeature->GetRawFieldRef(i);
1154 81 : if (IsValidField(psRawField))
1155 : {
1156 51 : const unsigned nCount = GetFromList::getCount(psRawField);
1157 51 : if (nCount > static_cast<size_t>(nMemLimit - nOffset))
1158 : {
1159 2 : if (nFeatCount == 0)
1160 1 : return 0;
1161 1 : break;
1162 : }
1163 49 : nOffset += static_cast<OffsetType>(nCount);
1164 : }
1165 30 : else if (bIsNullable)
1166 : {
1167 30 : ++psChild->null_count;
1168 30 : if (pabyValidity == nullptr)
1169 : {
1170 26 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1171 26 : psChild->buffers[0] = pabyValidity;
1172 26 : if (pabyValidity == nullptr)
1173 0 : return 0;
1174 : }
1175 30 : UnsetBit(pabyValidity, iFeat);
1176 : }
1177 : }
1178 46 : panOffsets[nFeatCount] = nOffset;
1179 :
1180 46 : psChild->n_children = 1;
1181 46 : psChild->children = static_cast<struct ArrowArray **>(
1182 46 : CPLCalloc(1, sizeof(struct ArrowArray *)));
1183 92 : psChild->children[0] = static_cast<struct ArrowArray *>(
1184 46 : CPLCalloc(1, sizeof(struct ArrowArray)));
1185 46 : auto psValueChild = psChild->children[0];
1186 :
1187 46 : psValueChild->release = OGRLayerDefaultReleaseArray;
1188 46 : psValueChild->n_buffers = 2;
1189 46 : psValueChild->buffers =
1190 46 : static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1191 46 : psValueChild->length = nOffset;
1192 : uint8_t *panValues = static_cast<uint8_t *>(
1193 46 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE((nOffset + 7 + 1) / 8));
1194 46 : if (panValues == nullptr)
1195 0 : return 0;
1196 46 : memset(panValues, 0, (nOffset + 7) / 8);
1197 46 : psValueChild->buffers[1] = panValues;
1198 :
1199 46 : nOffset = 0;
1200 126 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat)
1201 : {
1202 80 : auto &poFeature = apoFeatures[iFeat];
1203 80 : const auto psRawField = poFeature->GetRawFieldRef(i);
1204 80 : if (IsValidField(psRawField))
1205 : {
1206 50 : const int nCount = GetFromList::getCount(psRawField);
1207 50 : const auto paList = GetFromList::getValues(psRawField);
1208 :
1209 346 : for (int j = 0; j < nCount; ++j)
1210 : {
1211 296 : if (paList[j])
1212 47 : SetBit(panValues, nOffset + j);
1213 : }
1214 50 : nOffset += static_cast<OffsetType>(nCount);
1215 : }
1216 : }
1217 :
1218 46 : return nFeatCount;
1219 : }
1220 :
1221 : /************************************************************************/
1222 : /* FillStringArray() */
1223 : /************************************************************************/
1224 :
1225 : template <class T>
1226 : static size_t
1227 3761 : FillStringArray(struct ArrowArray *psChild,
1228 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1229 : const size_t nFeatureCountLimit, const bool bIsNullable,
1230 : const int i, const size_t nMemLimit)
1231 : {
1232 3761 : psChild->n_buffers = 3;
1233 3761 : psChild->buffers = static_cast<const void **>(CPLCalloc(3, sizeof(void *)));
1234 3761 : uint8_t *pabyValidity = nullptr;
1235 : T *panOffsets = static_cast<T *>(
1236 3761 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(T) * (1 + nFeatureCountLimit)));
1237 3761 : if (panOffsets == nullptr)
1238 0 : return 0;
1239 3761 : psChild->buffers[1] = panOffsets;
1240 :
1241 3761 : size_t nOffset = 0;
1242 3761 : size_t nFeatCount = 0;
1243 33980 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat, ++nFeatCount)
1244 : {
1245 30239 : panOffsets[iFeat] = static_cast<T>(nOffset);
1246 30239 : const auto psRawField = apoFeatures[iFeat]->GetRawFieldRef(i);
1247 30239 : if (IsValidField(psRawField))
1248 : {
1249 26919 : const size_t nLen = strlen(psRawField->String);
1250 26919 : if (nLen > nMemLimit - nOffset)
1251 : {
1252 20 : if (nFeatCount == 0)
1253 19 : return 0;
1254 1 : break;
1255 : }
1256 26899 : nOffset += static_cast<T>(nLen);
1257 : }
1258 3320 : else if (bIsNullable)
1259 : {
1260 3320 : ++psChild->null_count;
1261 3320 : if (pabyValidity == nullptr)
1262 : {
1263 1111 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1264 1111 : psChild->buffers[0] = pabyValidity;
1265 1111 : if (pabyValidity == nullptr)
1266 0 : return 0;
1267 : }
1268 3320 : UnsetBit(pabyValidity, iFeat);
1269 : }
1270 : }
1271 3742 : panOffsets[nFeatCount] = static_cast<T>(nOffset);
1272 :
1273 : char *pachValues =
1274 3742 : static_cast<char *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nOffset + 1));
1275 3742 : if (pachValues == nullptr)
1276 0 : return 0;
1277 3742 : psChild->buffers[2] = pachValues;
1278 :
1279 3742 : nOffset = 0;
1280 33961 : for (size_t iFeat = 0; iFeat < nFeatCount; ++iFeat)
1281 : {
1282 30219 : const size_t nLen =
1283 30219 : static_cast<size_t>(panOffsets[iFeat + 1] - panOffsets[iFeat]);
1284 30219 : if (nLen)
1285 : {
1286 25181 : const auto psRawField = apoFeatures[iFeat]->GetRawFieldRef(i);
1287 25181 : memcpy(pachValues + nOffset, psRawField->String, nLen);
1288 25181 : nOffset += nLen;
1289 : }
1290 : }
1291 :
1292 3742 : return nFeatCount;
1293 : }
1294 :
1295 : /************************************************************************/
1296 : /* FillStringListArray() */
1297 : /************************************************************************/
1298 :
1299 : template <class OffsetType>
1300 : static size_t
1301 200 : FillStringListArray(struct ArrowArray *psChild,
1302 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1303 : const size_t nFeatureCountLimit, const bool bIsNullable,
1304 : const int i, const size_t nMemLimit)
1305 : {
1306 200 : psChild->n_buffers = 2;
1307 200 : psChild->buffers = static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1308 200 : uint8_t *pabyValidity = nullptr;
1309 : OffsetType *panOffsets =
1310 200 : static_cast<OffsetType *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
1311 : sizeof(OffsetType) * (1 + nFeatureCountLimit)));
1312 200 : if (panOffsets == nullptr)
1313 0 : return false;
1314 200 : psChild->buffers[1] = panOffsets;
1315 :
1316 200 : OffsetType nStrings = 0;
1317 200 : OffsetType nCountChars = 0;
1318 200 : size_t nFeatCount = 0;
1319 498 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat, ++nFeatCount)
1320 : {
1321 300 : panOffsets[iFeat] = nStrings;
1322 300 : auto &poFeature = apoFeatures[iFeat];
1323 300 : const auto psRawField = poFeature->GetRawFieldRef(i);
1324 300 : if (IsValidField(psRawField))
1325 : {
1326 95 : const int nCount = psRawField->StringList.nCount;
1327 95 : if (static_cast<size_t>(nCount) >
1328 95 : static_cast<size_t>(nMemLimit - nStrings))
1329 : {
1330 0 : if (nFeatCount == 0)
1331 0 : return 0;
1332 0 : goto after_loop;
1333 : }
1334 247 : for (int j = 0; j < nCount; ++j)
1335 : {
1336 154 : const size_t nLen = strlen(psRawField->StringList.paList[j]);
1337 154 : if (nLen > static_cast<size_t>(nMemLimit - nCountChars))
1338 : {
1339 2 : if (nFeatCount == 0)
1340 1 : return 0;
1341 1 : goto after_loop;
1342 : }
1343 152 : nCountChars += static_cast<OffsetType>(nLen);
1344 : }
1345 93 : nStrings += static_cast<OffsetType>(nCount);
1346 : }
1347 205 : else if (bIsNullable)
1348 : {
1349 205 : ++psChild->null_count;
1350 205 : if (pabyValidity == nullptr)
1351 : {
1352 150 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1353 150 : psChild->buffers[0] = pabyValidity;
1354 150 : if (pabyValidity == nullptr)
1355 0 : return 0;
1356 : }
1357 205 : UnsetBit(pabyValidity, iFeat);
1358 : }
1359 : }
1360 198 : after_loop:
1361 199 : panOffsets[nFeatCount] = nStrings;
1362 :
1363 199 : psChild->n_children = 1;
1364 199 : psChild->children = static_cast<struct ArrowArray **>(
1365 199 : CPLCalloc(1, sizeof(struct ArrowArray *)));
1366 398 : psChild->children[0] = static_cast<struct ArrowArray *>(
1367 199 : CPLCalloc(1, sizeof(struct ArrowArray)));
1368 199 : auto psValueChild = psChild->children[0];
1369 :
1370 199 : psValueChild->release = OGRLayerDefaultReleaseArray;
1371 199 : psValueChild->length = nStrings;
1372 199 : psValueChild->n_buffers = 3;
1373 199 : psValueChild->buffers =
1374 199 : static_cast<const void **>(CPLCalloc(3, sizeof(void *)));
1375 :
1376 : OffsetType *panChildOffsets = static_cast<OffsetType *>(
1377 199 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(OffsetType) * (1 + nStrings)));
1378 199 : if (panChildOffsets == nullptr)
1379 0 : return 0;
1380 199 : psValueChild->buffers[1] = panChildOffsets;
1381 :
1382 : char *pachValues =
1383 199 : static_cast<char *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nCountChars + 1));
1384 199 : if (pachValues == nullptr)
1385 0 : return 0;
1386 199 : psValueChild->buffers[2] = pachValues;
1387 :
1388 199 : nStrings = 0;
1389 199 : nCountChars = 0;
1390 497 : for (size_t iFeat = 0; iFeat < nFeatCount; ++iFeat)
1391 : {
1392 298 : auto &poFeature = apoFeatures[iFeat];
1393 298 : const auto psRawField = poFeature->GetRawFieldRef(i);
1394 298 : if (IsValidField(psRawField))
1395 : {
1396 93 : const int nCount = psRawField->StringList.nCount;
1397 245 : for (int j = 0; j < nCount; ++j)
1398 : {
1399 152 : panChildOffsets[nStrings] = nCountChars;
1400 152 : ++nStrings;
1401 152 : const size_t nLen = strlen(psRawField->StringList.paList[j]);
1402 152 : memcpy(pachValues + nCountChars,
1403 152 : psRawField->StringList.paList[j], nLen);
1404 152 : nCountChars += static_cast<OffsetType>(nLen);
1405 : }
1406 : }
1407 : }
1408 199 : panChildOffsets[nStrings] = nCountChars;
1409 :
1410 199 : return nFeatCount;
1411 : }
1412 :
1413 : /************************************************************************/
1414 : /* FillBinaryArray() */
1415 : /************************************************************************/
1416 :
1417 : template <class T>
1418 : static size_t
1419 903 : FillBinaryArray(struct ArrowArray *psChild,
1420 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1421 : const size_t nFeatureCountLimit, const bool bIsNullable,
1422 : const int i, const size_t nMemLimit)
1423 : {
1424 903 : psChild->n_buffers = 3;
1425 903 : psChild->buffers = static_cast<const void **>(CPLCalloc(3, sizeof(void *)));
1426 903 : uint8_t *pabyValidity = nullptr;
1427 : T *panOffsets = static_cast<T *>(
1428 903 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(T) * (1 + nFeatureCountLimit)));
1429 903 : if (panOffsets == nullptr)
1430 0 : return 0;
1431 903 : psChild->buffers[1] = panOffsets;
1432 :
1433 903 : T nOffset = 0;
1434 903 : size_t nFeatCount = 0;
1435 4350 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat, ++nFeatCount)
1436 : {
1437 3449 : panOffsets[iFeat] = nOffset;
1438 3449 : const auto psRawField = apoFeatures[iFeat]->GetRawFieldRef(i);
1439 3449 : if (IsValidField(psRawField))
1440 : {
1441 3392 : const size_t nLen = psRawField->Binary.nCount;
1442 3392 : if (nLen > static_cast<size_t>(nMemLimit - nOffset))
1443 : {
1444 2 : if (iFeat == 0)
1445 1 : return 0;
1446 1 : break;
1447 : }
1448 3390 : nOffset += static_cast<T>(nLen);
1449 : }
1450 57 : else if (bIsNullable)
1451 : {
1452 57 : ++psChild->null_count;
1453 57 : if (pabyValidity == nullptr)
1454 : {
1455 49 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1456 49 : psChild->buffers[0] = pabyValidity;
1457 49 : if (pabyValidity == nullptr)
1458 0 : return 0;
1459 : }
1460 57 : UnsetBit(pabyValidity, iFeat);
1461 : }
1462 : }
1463 902 : panOffsets[nFeatCount] = nOffset;
1464 :
1465 : GByte *pabyValues =
1466 902 : static_cast<GByte *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nOffset + 1));
1467 902 : if (pabyValues == nullptr)
1468 0 : return 0;
1469 902 : psChild->buffers[2] = pabyValues;
1470 :
1471 902 : nOffset = 0;
1472 4349 : for (size_t iFeat = 0; iFeat < nFeatCount; ++iFeat)
1473 : {
1474 3447 : const size_t nLen =
1475 3447 : static_cast<size_t>(panOffsets[iFeat + 1] - panOffsets[iFeat]);
1476 3447 : if (nLen)
1477 : {
1478 3390 : const auto psRawField = apoFeatures[iFeat]->GetRawFieldRef(i);
1479 3390 : memcpy(pabyValues + nOffset, psRawField->Binary.paData, nLen);
1480 3390 : nOffset += static_cast<T>(nLen);
1481 : }
1482 : }
1483 :
1484 902 : return nFeatCount;
1485 : }
1486 :
1487 : /************************************************************************/
1488 : /* FillFixedWidthBinaryArray() */
1489 : /************************************************************************/
1490 :
1491 : static bool
1492 7 : FillFixedWidthBinaryArray(struct ArrowArray *psChild,
1493 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1494 : const size_t nFeatureCountLimit,
1495 : const bool bIsNullable, const int nWidth, const int i)
1496 : {
1497 7 : psChild->n_buffers = 2;
1498 7 : psChild->buffers = static_cast<const void **>(CPLCalloc(3, sizeof(void *)));
1499 7 : uint8_t *pabyValidity = nullptr;
1500 :
1501 7 : assert(nFeatureCountLimit + 1 <=
1502 : std::numeric_limits<size_t>::max() / nWidth);
1503 : GByte *pabyValues = static_cast<GByte *>(
1504 7 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE((nFeatureCountLimit + 1) * nWidth));
1505 7 : if (pabyValues == nullptr)
1506 0 : return false;
1507 7 : psChild->buffers[1] = pabyValues;
1508 :
1509 23 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat)
1510 : {
1511 16 : const auto psRawField = apoFeatures[iFeat]->GetRawFieldRef(i);
1512 16 : if (IsValidField(psRawField))
1513 : {
1514 15 : const auto nLen = psRawField->Binary.nCount;
1515 15 : if (nLen < nWidth)
1516 : {
1517 0 : memcpy(pabyValues + iFeat * nWidth, psRawField->Binary.paData,
1518 : nLen);
1519 0 : memset(pabyValues + iFeat * nWidth + nLen, 0, nWidth - nLen);
1520 : }
1521 : else
1522 : {
1523 15 : memcpy(pabyValues + iFeat * nWidth, psRawField->Binary.paData,
1524 : nWidth);
1525 : }
1526 : }
1527 : else
1528 : {
1529 1 : memset(pabyValues + iFeat * nWidth, 0, nWidth);
1530 1 : if (bIsNullable)
1531 : {
1532 1 : ++psChild->null_count;
1533 1 : if (pabyValidity == nullptr)
1534 : {
1535 1 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1536 1 : psChild->buffers[0] = pabyValidity;
1537 1 : if (pabyValidity == nullptr)
1538 0 : return false;
1539 : }
1540 1 : UnsetBit(pabyValidity, iFeat);
1541 : }
1542 : }
1543 : }
1544 :
1545 7 : return true;
1546 : }
1547 :
1548 : /************************************************************************/
1549 : /* FillWKBGeometryArray() */
1550 : /************************************************************************/
1551 :
1552 : template <class T>
1553 : static size_t
1554 1224 : FillWKBGeometryArray(struct ArrowArray *psChild,
1555 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1556 : const size_t nFeatureCountLimit,
1557 : const OGRGeomFieldDefn *poFieldDefn, const int i,
1558 : const size_t nMemLimit)
1559 : {
1560 1224 : const bool bIsNullable = CPL_TO_BOOL(poFieldDefn->IsNullable());
1561 1224 : psChild->n_buffers = 3;
1562 1224 : psChild->buffers = static_cast<const void **>(CPLCalloc(3, sizeof(void *)));
1563 1224 : uint8_t *pabyValidity = nullptr;
1564 : T *panOffsets = static_cast<T *>(
1565 1224 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(T) * (1 + nFeatureCountLimit)));
1566 1224 : if (panOffsets == nullptr)
1567 0 : return 0;
1568 1224 : psChild->buffers[1] = panOffsets;
1569 1224 : const auto eGeomType = poFieldDefn->GetType();
1570 3672 : auto poEmptyGeom =
1571 : std::unique_ptr<OGRGeometry>(OGRGeometryFactory::createGeometry(
1572 1224 : (eGeomType == wkbNone || wkbFlatten(eGeomType) == wkbUnknown)
1573 : ? wkbGeometryCollection
1574 : : eGeomType));
1575 :
1576 1224 : size_t nOffset = 0;
1577 1224 : size_t nFeatCount = 0;
1578 14256 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat, ++nFeatCount)
1579 : {
1580 13033 : panOffsets[iFeat] = static_cast<T>(nOffset);
1581 13033 : const auto poGeom = apoFeatures[iFeat]->GetGeomFieldRef(i);
1582 13033 : if (poGeom != nullptr)
1583 : {
1584 12485 : const size_t nLen = poGeom->WkbSize();
1585 12485 : if (nLen > nMemLimit - nOffset)
1586 : {
1587 1 : if (nFeatCount == 0)
1588 0 : return 0;
1589 1 : break;
1590 : }
1591 12484 : nOffset += static_cast<T>(nLen);
1592 : }
1593 548 : else if (bIsNullable)
1594 : {
1595 548 : ++psChild->null_count;
1596 548 : if (pabyValidity == nullptr)
1597 : {
1598 269 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1599 269 : psChild->buffers[0] = pabyValidity;
1600 269 : if (pabyValidity == nullptr)
1601 0 : return 0;
1602 : }
1603 548 : UnsetBit(pabyValidity, iFeat);
1604 : }
1605 0 : else if (poEmptyGeom)
1606 : {
1607 0 : const size_t nLen = poEmptyGeom->WkbSize();
1608 0 : if (nLen > nMemLimit - nOffset)
1609 : {
1610 0 : if (nFeatCount == 0)
1611 0 : return 0;
1612 0 : break;
1613 : }
1614 0 : nOffset += static_cast<T>(nLen);
1615 : }
1616 : }
1617 1224 : panOffsets[nFeatCount] = static_cast<T>(nOffset);
1618 :
1619 : GByte *pabyValues =
1620 1224 : static_cast<GByte *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nOffset + 1));
1621 1224 : if (pabyValues == nullptr)
1622 0 : return 0;
1623 1224 : psChild->buffers[2] = pabyValues;
1624 :
1625 1224 : nOffset = 0;
1626 14256 : for (size_t iFeat = 0; iFeat < nFeatCount; ++iFeat)
1627 : {
1628 13032 : const size_t nLen =
1629 13032 : static_cast<size_t>(panOffsets[iFeat + 1] - panOffsets[iFeat]);
1630 13032 : if (nLen)
1631 : {
1632 12484 : const auto poGeom = apoFeatures[iFeat]->GetGeomFieldRef(i);
1633 12484 : poGeom->exportToWkb(wkbNDR, pabyValues + nOffset, wkbVariantIso);
1634 12484 : nOffset += nLen;
1635 : }
1636 548 : else if (!bIsNullable && poEmptyGeom)
1637 : {
1638 0 : poEmptyGeom->exportToWkb(wkbNDR, pabyValues + nOffset,
1639 : wkbVariantIso);
1640 0 : nOffset += nLen;
1641 : }
1642 : }
1643 :
1644 1224 : return nFeatCount;
1645 : }
1646 :
1647 : /************************************************************************/
1648 : /* FillDateArray() */
1649 : /************************************************************************/
1650 :
1651 123 : static bool FillDateArray(struct ArrowArray *psChild,
1652 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1653 : const size_t nFeatureCountLimit,
1654 : const bool bIsNullable, const int i)
1655 : {
1656 123 : psChild->n_buffers = 2;
1657 123 : psChild->buffers = static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1658 123 : uint8_t *pabyValidity = nullptr;
1659 123 : int32_t *panValues = static_cast<int32_t *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
1660 : sizeof(int32_t) * (nFeatureCountLimit + 1)));
1661 123 : if (panValues == nullptr)
1662 0 : return false;
1663 123 : psChild->buffers[1] = panValues;
1664 463 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat)
1665 : {
1666 340 : auto &poFeature = apoFeatures[iFeat];
1667 340 : const auto psRawField = poFeature->GetRawFieldRef(i);
1668 340 : if (IsValidField(psRawField))
1669 : {
1670 : struct tm brokenDown;
1671 252 : memset(&brokenDown, 0, sizeof(brokenDown));
1672 252 : brokenDown.tm_year = psRawField->Date.Year - 1900;
1673 252 : brokenDown.tm_mon = psRawField->Date.Month - 1;
1674 252 : brokenDown.tm_mday = psRawField->Date.Day;
1675 252 : panValues[iFeat] =
1676 252 : static_cast<int>(CPLYMDHMSToUnixTime(&brokenDown) / 86400);
1677 : }
1678 88 : else if (bIsNullable)
1679 : {
1680 88 : panValues[iFeat] = 0;
1681 88 : ++psChild->null_count;
1682 88 : if (pabyValidity == nullptr)
1683 : {
1684 61 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1685 61 : psChild->buffers[0] = pabyValidity;
1686 61 : if (pabyValidity == nullptr)
1687 0 : return false;
1688 : }
1689 88 : UnsetBit(pabyValidity, iFeat);
1690 : }
1691 : else
1692 : {
1693 0 : panValues[iFeat] = 0;
1694 : }
1695 : }
1696 123 : return true;
1697 : }
1698 :
1699 : /************************************************************************/
1700 : /* FillTimeArray() */
1701 : /************************************************************************/
1702 :
1703 70 : static bool FillTimeArray(struct ArrowArray *psChild,
1704 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1705 : const size_t nFeatureCountLimit,
1706 : const bool bIsNullable, const int i)
1707 : {
1708 70 : psChild->n_buffers = 2;
1709 70 : psChild->buffers = static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1710 70 : uint8_t *pabyValidity = nullptr;
1711 70 : int32_t *panValues = static_cast<int32_t *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
1712 : sizeof(int32_t) * (nFeatureCountLimit + 1)));
1713 70 : if (panValues == nullptr)
1714 0 : return false;
1715 70 : psChild->buffers[1] = panValues;
1716 655 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat)
1717 : {
1718 585 : auto &poFeature = apoFeatures[iFeat];
1719 585 : const auto psRawField = poFeature->GetRawFieldRef(i);
1720 585 : if (IsValidField(psRawField))
1721 : {
1722 539 : panValues[iFeat] =
1723 539 : psRawField->Date.Hour * 3600000 +
1724 539 : psRawField->Date.Minute * 60000 +
1725 539 : static_cast<int>(psRawField->Date.Second * 1000 + 0.5);
1726 : }
1727 46 : else if (bIsNullable)
1728 : {
1729 46 : panValues[iFeat] = 0;
1730 46 : ++psChild->null_count;
1731 46 : if (pabyValidity == nullptr)
1732 : {
1733 38 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1734 38 : psChild->buffers[0] = pabyValidity;
1735 38 : if (pabyValidity == nullptr)
1736 0 : return false;
1737 : }
1738 46 : UnsetBit(pabyValidity, iFeat);
1739 : }
1740 : else
1741 : {
1742 0 : panValues[iFeat] = 0;
1743 : }
1744 : }
1745 70 : return true;
1746 : }
1747 :
1748 : /************************************************************************/
1749 : /* FillDateTimeArray() */
1750 : /************************************************************************/
1751 :
1752 : static bool
1753 712 : FillDateTimeArray(struct ArrowArray *psChild,
1754 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1755 : const size_t nFeatureCountLimit, const bool bIsNullable,
1756 : const int i, int nFieldTZFlag)
1757 : {
1758 712 : psChild->n_buffers = 2;
1759 712 : psChild->buffers = static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1760 712 : uint8_t *pabyValidity = nullptr;
1761 712 : int64_t *panValues = static_cast<int64_t *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
1762 : sizeof(int64_t) * (nFeatureCountLimit + 1)));
1763 712 : if (panValues == nullptr)
1764 0 : return false;
1765 712 : psChild->buffers[1] = panValues;
1766 : struct tm brokenDown;
1767 712 : memset(&brokenDown, 0, sizeof(brokenDown));
1768 3141 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat)
1769 : {
1770 2429 : auto &poFeature = apoFeatures[iFeat];
1771 2429 : const auto psRawField = poFeature->GetRawFieldRef(i);
1772 2429 : if (IsValidField(psRawField))
1773 : {
1774 1670 : brokenDown.tm_year = psRawField->Date.Year - 1900;
1775 1670 : brokenDown.tm_mon = psRawField->Date.Month - 1;
1776 1670 : brokenDown.tm_mday = psRawField->Date.Day;
1777 1670 : brokenDown.tm_hour = psRawField->Date.Hour;
1778 1670 : brokenDown.tm_min = psRawField->Date.Minute;
1779 1670 : brokenDown.tm_sec = static_cast<int>(psRawField->Date.Second);
1780 : auto nVal =
1781 1670 : CPLYMDHMSToUnixTime(&brokenDown) * 1000 +
1782 1670 : (static_cast<int>(psRawField->Date.Second * 1000 + 0.5) % 1000);
1783 1670 : if (nFieldTZFlag >= OGR_TZFLAG_MIXED_TZ &&
1784 65 : psRawField->Date.TZFlag > OGR_TZFLAG_MIXED_TZ)
1785 : {
1786 : // Convert for psRawField->Date.TZFlag to UTC
1787 65 : const int TZOffset =
1788 65 : (psRawField->Date.TZFlag - OGR_TZFLAG_UTC) * 15;
1789 65 : const int TZOffsetMS = TZOffset * 60 * 1000;
1790 65 : nVal -= TZOffsetMS;
1791 : }
1792 1670 : panValues[iFeat] = nVal;
1793 : }
1794 759 : else if (bIsNullable)
1795 : {
1796 759 : panValues[iFeat] = 0;
1797 759 : ++psChild->null_count;
1798 759 : if (pabyValidity == nullptr)
1799 : {
1800 261 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1801 261 : psChild->buffers[0] = pabyValidity;
1802 261 : if (pabyValidity == nullptr)
1803 0 : return false;
1804 : }
1805 759 : UnsetBit(pabyValidity, iFeat);
1806 : }
1807 : else
1808 : {
1809 0 : panValues[iFeat] = 0;
1810 : }
1811 : }
1812 712 : return true;
1813 : }
1814 :
1815 : /************************************************************************/
1816 : /* GetNextArrowArray() */
1817 : /************************************************************************/
1818 :
1819 : /** Default implementation of the ArrowArrayStream::get_next() callback.
1820 : *
1821 : * To be used by driver implementations that have a custom GetArrowStream()
1822 : * implementation.
1823 : *
1824 : * @since GDAL 3.6
1825 : */
1826 3546 : int OGRLayer::GetNextArrowArray(struct ArrowArrayStream *stream,
1827 : struct ArrowArray *out_array)
1828 : {
1829 3546 : ArrowArrayStreamPrivateDataSharedDataWrapper *poPrivate =
1830 : static_cast<ArrowArrayStreamPrivateDataSharedDataWrapper *>(
1831 : stream->private_data);
1832 :
1833 3546 : const bool bIncludeFID = CPLTestBool(
1834 : m_aosArrowArrayStreamOptions.FetchNameValueDef("INCLUDE_FID", "YES"));
1835 3546 : int nMaxBatchSize = atoi(m_aosArrowArrayStreamOptions.FetchNameValueDef(
1836 : "MAX_FEATURES_IN_BATCH", "65536"));
1837 3546 : if (nMaxBatchSize <= 0)
1838 0 : nMaxBatchSize = 1;
1839 3546 : if (nMaxBatchSize > INT_MAX - 1)
1840 0 : nMaxBatchSize = INT_MAX - 1;
1841 :
1842 : auto &oFeatureQueue =
1843 3546 : m_poSharedArrowArrayStreamPrivateData->m_oFeatureQueue;
1844 :
1845 3546 : memset(out_array, 0, sizeof(*out_array));
1846 :
1847 3546 : auto poLayerDefn = GetLayerDefn();
1848 3546 : const int nFieldCount = poLayerDefn->GetFieldCount();
1849 3546 : const int nGeomFieldCount = poLayerDefn->GetGeomFieldCount();
1850 3546 : const int nMaxChildren =
1851 3546 : (bIncludeFID ? 1 : 0) + nFieldCount + nGeomFieldCount;
1852 3546 : int iSchemaChild = 0;
1853 :
1854 3546 : if (!m_poSharedArrowArrayStreamPrivateData->m_anQueriedFIDs.empty())
1855 : {
1856 6 : if (poPrivate->poShared->m_bEOF)
1857 : {
1858 2 : return 0;
1859 : }
1860 4 : if (m_poSharedArrowArrayStreamPrivateData->m_iQueriedFIDS == 0)
1861 : {
1862 4 : CPLDebug("OGR", "Using fast FID filtering");
1863 : }
1864 8 : while (
1865 24 : oFeatureQueue.size() < static_cast<size_t>(nMaxBatchSize) &&
1866 12 : m_poSharedArrowArrayStreamPrivateData->m_iQueriedFIDS <
1867 12 : m_poSharedArrowArrayStreamPrivateData->m_anQueriedFIDs.size())
1868 : {
1869 : const auto nFID =
1870 8 : m_poSharedArrowArrayStreamPrivateData->m_anQueriedFIDs
1871 8 : [m_poSharedArrowArrayStreamPrivateData->m_iQueriedFIDS];
1872 16 : auto poFeature = std::unique_ptr<OGRFeature>(GetFeature(nFID));
1873 8 : ++m_poSharedArrowArrayStreamPrivateData->m_iQueriedFIDS;
1874 8 : if (poFeature && (m_poFilterGeom == nullptr ||
1875 0 : FilterGeometry(poFeature->GetGeomFieldRef(
1876 8 : m_iGeomFieldFilter))))
1877 : {
1878 4 : oFeatureQueue.emplace_back(std::move(poFeature));
1879 : }
1880 : }
1881 8 : if (m_poSharedArrowArrayStreamPrivateData->m_iQueriedFIDS ==
1882 4 : m_poSharedArrowArrayStreamPrivateData->m_anQueriedFIDs.size())
1883 : {
1884 4 : poPrivate->poShared->m_bEOF = true;
1885 : }
1886 : }
1887 3540 : else if (!poPrivate->poShared->m_bEOF)
1888 : {
1889 18779 : while (oFeatureQueue.size() < static_cast<size_t>(nMaxBatchSize))
1890 : {
1891 18776 : auto poFeature = std::unique_ptr<OGRFeature>(GetNextFeature());
1892 18776 : if (!poFeature)
1893 : {
1894 1825 : poPrivate->poShared->m_bEOF = true;
1895 1825 : break;
1896 : }
1897 16951 : oFeatureQueue.emplace_back(std::move(poFeature));
1898 : }
1899 : }
1900 3544 : if (oFeatureQueue.empty())
1901 : {
1902 2101 : return 0;
1903 : }
1904 :
1905 1443 : out_array->release = OGRLayerDefaultReleaseArray;
1906 1443 : out_array->null_count = 0;
1907 :
1908 1443 : out_array->n_children = nMaxChildren;
1909 1443 : out_array->children = static_cast<struct ArrowArray **>(
1910 1443 : CPLCalloc(nMaxChildren, sizeof(struct ArrowArray *)));
1911 1443 : out_array->release = OGRLayerDefaultReleaseArray;
1912 1443 : out_array->n_buffers = 1;
1913 1443 : out_array->buffers =
1914 1443 : static_cast<const void **>(CPLCalloc(1, sizeof(void *)));
1915 :
1916 1443 : size_t nFeatureCount = oFeatureQueue.size();
1917 1443 : const uint32_t nMemLimit = OGRArrowArrayHelper::GetMemLimit();
1918 1443 : if (bIncludeFID)
1919 : {
1920 2632 : out_array->children[iSchemaChild] = static_cast<struct ArrowArray *>(
1921 1316 : CPLCalloc(1, sizeof(struct ArrowArray)));
1922 1316 : auto psChild = out_array->children[iSchemaChild];
1923 1316 : ++iSchemaChild;
1924 1316 : psChild->release = OGRLayerDefaultReleaseArray;
1925 1316 : psChild->n_buffers = 2;
1926 1316 : psChild->buffers =
1927 1316 : static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1928 : int64_t *panValues =
1929 1316 : static_cast<int64_t *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
1930 : sizeof(int64_t) * (oFeatureQueue.size() + 1)));
1931 1316 : if (panValues == nullptr)
1932 0 : goto error;
1933 1316 : psChild->buffers[1] = panValues;
1934 17842 : for (size_t iFeat = 0; iFeat < oFeatureQueue.size(); ++iFeat)
1935 : {
1936 16526 : panValues[iFeat] = oFeatureQueue[iFeat]->GetFID();
1937 : }
1938 : }
1939 :
1940 13619 : for (int i = 0; i < nFieldCount; ++i)
1941 : {
1942 12201 : const auto poFieldDefn = poLayerDefn->GetFieldDefn(i);
1943 12201 : if (poFieldDefn->IsIgnored())
1944 : {
1945 13 : continue;
1946 : }
1947 :
1948 24376 : out_array->children[iSchemaChild] = static_cast<struct ArrowArray *>(
1949 12188 : CPLCalloc(1, sizeof(struct ArrowArray)));
1950 12188 : auto psChild = out_array->children[iSchemaChild];
1951 12188 : ++iSchemaChild;
1952 12188 : psChild->release = OGRLayerDefaultReleaseArray;
1953 12188 : const bool bIsNullable = CPL_TO_BOOL(poFieldDefn->IsNullable());
1954 12188 : const auto eSubType = poFieldDefn->GetSubType();
1955 12188 : switch (poFieldDefn->GetType())
1956 : {
1957 3623 : case OFTInteger:
1958 : {
1959 3623 : if (eSubType == OFSTBoolean)
1960 : {
1961 137 : if (!FillBoolArray(psChild, oFeatureQueue, nFeatureCount,
1962 : bIsNullable, &OGRField::Integer, i))
1963 0 : goto error;
1964 : }
1965 3486 : else if (eSubType == OFSTInt16)
1966 : {
1967 477 : if (!FillArray<int16_t>(psChild, oFeatureQueue,
1968 : nFeatureCount, bIsNullable,
1969 : &OGRField::Integer, i))
1970 0 : goto error;
1971 : }
1972 : else
1973 : {
1974 3009 : if (!FillArray<int32_t>(psChild, oFeatureQueue,
1975 : nFeatureCount, bIsNullable,
1976 : &OGRField::Integer, i))
1977 0 : goto error;
1978 : }
1979 :
1980 3623 : const auto &osDomainName = poFieldDefn->GetDomainName();
1981 3623 : if (!osDomainName.empty())
1982 : {
1983 12 : auto poDS = GetDataset();
1984 12 : if (poDS)
1985 : {
1986 : const auto poFieldDomain =
1987 12 : poDS->GetFieldDomain(osDomainName);
1988 24 : if (poFieldDomain &&
1989 12 : poFieldDomain->GetDomainType() == OFDT_CODED)
1990 : {
1991 12 : const OGRCodedFieldDomain *poCodedDomain =
1992 : static_cast<const OGRCodedFieldDomain *>(
1993 : poFieldDomain);
1994 12 : OGRArrowArrayHelper::FillDict(psChild,
1995 : poCodedDomain);
1996 : }
1997 : }
1998 : }
1999 :
2000 3623 : break;
2001 : }
2002 :
2003 292 : case OFTInteger64:
2004 : {
2005 292 : if (!FillArray<int64_t>(psChild, oFeatureQueue, nFeatureCount,
2006 : bIsNullable, &OGRField::Integer64, i))
2007 0 : goto error;
2008 292 : break;
2009 : }
2010 :
2011 2057 : case OFTReal:
2012 : {
2013 2057 : if (eSubType == OFSTFloat32)
2014 : {
2015 477 : if (!FillArray<float>(psChild, oFeatureQueue, nFeatureCount,
2016 : bIsNullable, &OGRField::Real, i))
2017 0 : goto error;
2018 : }
2019 : else
2020 : {
2021 1580 : if (!FillArray<double>(psChild, oFeatureQueue,
2022 : nFeatureCount, bIsNullable,
2023 : &OGRField::Real, i))
2024 0 : goto error;
2025 : }
2026 2057 : break;
2027 : }
2028 :
2029 3761 : case OFTString:
2030 : case OFTWideString:
2031 : {
2032 3761 : const size_t nThisFeatureCount = FillStringArray<int32_t>(
2033 : psChild, oFeatureQueue, nFeatureCount, bIsNullable, i,
2034 : nMemLimit);
2035 3761 : if (nThisFeatureCount == 0)
2036 : {
2037 19 : goto error_max_mem;
2038 : }
2039 3742 : if (nThisFeatureCount < nFeatureCount)
2040 1 : nFeatureCount = nThisFeatureCount;
2041 3742 : break;
2042 : }
2043 :
2044 910 : case OFTBinary:
2045 : {
2046 910 : const int nWidth = poFieldDefn->GetWidth();
2047 910 : if (nWidth > 0)
2048 : {
2049 7 : if (nFeatureCount > nMemLimit / nWidth)
2050 : {
2051 1 : nFeatureCount = nMemLimit / nWidth;
2052 1 : if (nFeatureCount == 0)
2053 0 : goto error_max_mem;
2054 : }
2055 7 : if (!FillFixedWidthBinaryArray(psChild, oFeatureQueue,
2056 : nFeatureCount, bIsNullable,
2057 : nWidth, i))
2058 0 : goto error;
2059 : }
2060 : else
2061 : {
2062 903 : const size_t nThisFeatureCount = FillBinaryArray<int32_t>(
2063 : psChild, oFeatureQueue, nFeatureCount, bIsNullable, i,
2064 : nMemLimit);
2065 903 : if (nThisFeatureCount == 0)
2066 : {
2067 1 : goto error_max_mem;
2068 : }
2069 902 : if (nThisFeatureCount < nFeatureCount)
2070 1 : nFeatureCount = nThisFeatureCount;
2071 : }
2072 909 : break;
2073 : }
2074 :
2075 222 : case OFTIntegerList:
2076 : {
2077 : size_t nThisFeatureCount;
2078 222 : if (eSubType == OFSTBoolean)
2079 : {
2080 : nThisFeatureCount =
2081 47 : FillListArrayBool<int32_t, GetFromIntegerList>(
2082 : psChild, oFeatureQueue, nFeatureCount, bIsNullable,
2083 : i, nMemLimit);
2084 : }
2085 175 : else if (eSubType == OFSTInt16)
2086 : {
2087 : nThisFeatureCount =
2088 28 : FillListArray<int32_t, int16_t, GetFromIntegerList>(
2089 : psChild, oFeatureQueue, nFeatureCount, bIsNullable,
2090 : i, nMemLimit);
2091 : }
2092 : else
2093 : {
2094 : nThisFeatureCount =
2095 147 : FillListArray<int32_t, int32_t, GetFromIntegerList>(
2096 : psChild, oFeatureQueue, nFeatureCount, bIsNullable,
2097 : i, nMemLimit);
2098 : }
2099 222 : if (nThisFeatureCount == 0)
2100 : {
2101 2 : goto error_max_mem;
2102 : }
2103 220 : if (nThisFeatureCount < nFeatureCount)
2104 2 : nFeatureCount = nThisFeatureCount;
2105 220 : break;
2106 : }
2107 :
2108 70 : case OFTInteger64List:
2109 : {
2110 : const size_t nThisFeatureCount =
2111 70 : FillListArray<int32_t, int64_t, GetFromInteger64List>(
2112 : psChild, oFeatureQueue, nFeatureCount, bIsNullable, i,
2113 : nMemLimit);
2114 70 : if (nThisFeatureCount == 0)
2115 : {
2116 1 : goto error_max_mem;
2117 : }
2118 69 : if (nThisFeatureCount < nFeatureCount)
2119 1 : nFeatureCount = nThisFeatureCount;
2120 69 : break;
2121 : }
2122 :
2123 148 : case OFTRealList:
2124 : {
2125 : size_t nThisFeatureCount;
2126 148 : if (eSubType == OFSTFloat32)
2127 : {
2128 : nThisFeatureCount =
2129 39 : FillListArray<int32_t, float, GetFromRealList>(
2130 : psChild, oFeatureQueue, nFeatureCount, bIsNullable,
2131 : i, nMemLimit);
2132 : }
2133 : else
2134 : {
2135 : nThisFeatureCount =
2136 109 : FillListArray<int32_t, double, GetFromRealList>(
2137 : psChild, oFeatureQueue, nFeatureCount, bIsNullable,
2138 : i, nMemLimit);
2139 : }
2140 148 : if (nThisFeatureCount == 0)
2141 : {
2142 1 : goto error_max_mem;
2143 : }
2144 147 : if (nThisFeatureCount < nFeatureCount)
2145 1 : nFeatureCount = nThisFeatureCount;
2146 147 : break;
2147 : }
2148 :
2149 200 : case OFTStringList:
2150 : case OFTWideStringList:
2151 : {
2152 200 : const size_t nThisFeatureCount = FillStringListArray<int32_t>(
2153 : psChild, oFeatureQueue, nFeatureCount, bIsNullable, i,
2154 : nMemLimit);
2155 200 : if (nThisFeatureCount == 0)
2156 : {
2157 1 : goto error_max_mem;
2158 : }
2159 199 : if (nThisFeatureCount < nFeatureCount)
2160 1 : nFeatureCount = nThisFeatureCount;
2161 199 : break;
2162 : }
2163 :
2164 123 : case OFTDate:
2165 : {
2166 123 : if (!FillDateArray(psChild, oFeatureQueue, nFeatureCount,
2167 : bIsNullable, i))
2168 0 : goto error;
2169 123 : break;
2170 : }
2171 :
2172 70 : case OFTTime:
2173 : {
2174 70 : if (!FillTimeArray(psChild, oFeatureQueue, nFeatureCount,
2175 : bIsNullable, i))
2176 0 : goto error;
2177 70 : break;
2178 : }
2179 :
2180 712 : case OFTDateTime:
2181 : {
2182 712 : if (!FillDateTimeArray(psChild, oFeatureQueue, nFeatureCount,
2183 : bIsNullable, i,
2184 : poFieldDefn->GetTZFlag()))
2185 0 : goto error;
2186 712 : break;
2187 : }
2188 : }
2189 : }
2190 2645 : for (int i = 0; i < nGeomFieldCount; ++i)
2191 : {
2192 1227 : const auto poFieldDefn = poLayerDefn->GetGeomFieldDefn(i);
2193 1227 : if (poFieldDefn->IsIgnored())
2194 : {
2195 3 : continue;
2196 : }
2197 :
2198 2448 : out_array->children[iSchemaChild] = static_cast<struct ArrowArray *>(
2199 1224 : CPLCalloc(1, sizeof(struct ArrowArray)));
2200 1224 : auto psChild = out_array->children[iSchemaChild];
2201 1224 : ++iSchemaChild;
2202 1224 : psChild->release = OGRLayerDefaultReleaseArray;
2203 1224 : psChild->length = oFeatureQueue.size();
2204 1224 : const size_t nThisFeatureCount = FillWKBGeometryArray<int32_t>(
2205 : psChild, oFeatureQueue, nFeatureCount, poFieldDefn, i, nMemLimit);
2206 1224 : if (nThisFeatureCount == 0)
2207 : {
2208 0 : goto error_max_mem;
2209 : }
2210 1224 : if (nThisFeatureCount < nFeatureCount)
2211 1 : nFeatureCount = nThisFeatureCount;
2212 : }
2213 :
2214 : // Remove consumed features from the queue
2215 1418 : if (nFeatureCount == oFeatureQueue.size())
2216 1409 : oFeatureQueue.clear();
2217 : else
2218 : {
2219 27 : for (size_t i = 0; i < nFeatureCount; ++i)
2220 : {
2221 18 : oFeatureQueue.pop_front();
2222 : }
2223 : }
2224 :
2225 1418 : out_array->n_children = iSchemaChild;
2226 1418 : out_array->length = nFeatureCount;
2227 16017 : for (int i = 0; i < out_array->n_children; ++i)
2228 : {
2229 14599 : out_array->children[i]->length = nFeatureCount;
2230 : }
2231 :
2232 1418 : return 0;
2233 :
2234 25 : error_max_mem:
2235 25 : CPLError(CE_Failure, CPLE_AppDefined,
2236 : "Too large feature: not even a single feature can be returned");
2237 25 : error:
2238 25 : oFeatureQueue.clear();
2239 25 : poPrivate->poShared->m_bEOF = true;
2240 25 : out_array->release(out_array);
2241 25 : memset(out_array, 0, sizeof(*out_array));
2242 25 : return ENOMEM;
2243 : }
2244 :
2245 : /************************************************************************/
2246 : /* StaticGetNextArrowArray() */
2247 : /************************************************************************/
2248 :
2249 : /** Default implementation of the ArrowArrayStream::get_next() callback.
2250 : *
2251 : * To be used by driver implementations that have a custom GetArrowStream()
2252 : * implementation.
2253 : *
2254 : * @since GDAL 3.6
2255 : */
2256 4491 : int OGRLayer::StaticGetNextArrowArray(struct ArrowArrayStream *stream,
2257 : struct ArrowArray *out_array)
2258 : {
2259 : auto poLayer = static_cast<ArrowArrayStreamPrivateDataSharedDataWrapper *>(
2260 4491 : stream->private_data)
2261 4491 : ->poShared->m_poLayer;
2262 4491 : if (poLayer == nullptr)
2263 : {
2264 1 : CPLError(CE_Failure, CPLE_NotSupported,
2265 : "Calling get_next() on a freed OGRLayer is not supported");
2266 1 : return EINVAL;
2267 : }
2268 4490 : return poLayer->GetNextArrowArray(stream, out_array);
2269 : }
2270 :
2271 : /************************************************************************/
2272 : /* ReleaseStream() */
2273 : /************************************************************************/
2274 :
2275 : /** Release a ArrowArrayStream.
2276 : *
2277 : * To be used by driver implementations that have a custom GetArrowStream()
2278 : * implementation.
2279 : *
2280 : * @param stream Arrow array stream to release.
2281 : * @since GDAL 3.6
2282 : */
2283 2202 : void OGRLayer::ReleaseStream(struct ArrowArrayStream *stream)
2284 : {
2285 2202 : assert(stream->release == OGRLayer::ReleaseStream);
2286 2202 : ArrowArrayStreamPrivateDataSharedDataWrapper *poPrivate =
2287 : static_cast<ArrowArrayStreamPrivateDataSharedDataWrapper *>(
2288 : stream->private_data);
2289 2202 : poPrivate->poShared->m_bArrowArrayStreamInProgress = false;
2290 2202 : poPrivate->poShared->m_bEOF = false;
2291 2202 : if (poPrivate->poShared->m_poLayer)
2292 2151 : poPrivate->poShared->m_poLayer->ResetReading();
2293 2202 : delete poPrivate;
2294 2202 : stream->private_data = nullptr;
2295 2202 : stream->release = nullptr;
2296 2202 : }
2297 :
2298 : /************************************************************************/
2299 : /* GetLastErrorArrowArrayStream() */
2300 : /************************************************************************/
2301 :
2302 : /** Default implementation of the ArrowArrayStream::get_last_error() callback.
2303 : *
2304 : * To be used by driver implementations that have a custom GetArrowStream()
2305 : * implementation.
2306 : *
2307 : * @since GDAL 3.6
2308 : */
2309 3 : const char *OGRLayer::GetLastErrorArrowArrayStream(struct ArrowArrayStream *)
2310 : {
2311 3 : const char *pszLastErrorMsg = CPLGetLastErrorMsg();
2312 3 : return pszLastErrorMsg[0] != '\0' ? pszLastErrorMsg : nullptr;
2313 : }
2314 :
2315 : /************************************************************************/
2316 : /* GetArrowStream() */
2317 : /************************************************************************/
2318 :
2319 : /** Get a Arrow C stream.
2320 : *
2321 : * On successful return, and when the stream interfaces is no longer needed, it
2322 : * must must be freed with out_stream->release(out_stream). Please carefully
2323 : * read https://arrow.apache.org/docs/format/CStreamInterface.html for more
2324 : * details on using Arrow C stream.
2325 : *
2326 : * The method may take into account ignored fields set with SetIgnoredFields()
2327 : * (the default implementation does), and should take into account filters set
2328 : * with SetSpatialFilter() and SetAttributeFilter(). Note however that
2329 : * specialized implementations may fallback to the default (slower)
2330 : * implementation when filters are set.
2331 : * Drivers that have a specialized implementation should advertise the
2332 : * OLCFastGetArrowStream capability.
2333 : *
2334 : * There are extra precautions to take into account in a OGR context. Unless
2335 : * otherwise specified by a particular driver implementation, the get_schema(),
2336 : * get_next() and get_last_error() function pointers of the ArrowArrayStream
2337 : * structure should no longer be used after the OGRLayer, from which the
2338 : * ArrowArrayStream structure was initialized, has been destroyed (typically at
2339 : * dataset closing). The reason is that those function pointers will typically
2340 : * point to methods of the OGRLayer instance.
2341 : * However, the ArrowSchema and ArrowArray structures filled from those
2342 : * callbacks can be used and must be released independently from the
2343 : * ArrowArrayStream or the layer.
2344 : *
2345 : * Furthermore, unless otherwise specified by a particular driver
2346 : * implementation, only one ArrowArrayStream can be active at a time on
2347 : * a given layer (that is the last active one must be explicitly released before
2348 : * a next one is asked). Changing filter state, ignored columns, modifying the
2349 : * schema or using ResetReading()/GetNextFeature() while using a
2350 : * ArrowArrayStream is strongly discouraged and may lead to unexpected results.
2351 : * As a rule of thumb, no OGRLayer methods that affect the state of a layer
2352 : * should be called on a layer, while an ArrowArrayStream on it is active.
2353 : *
2354 : * Starting with GDAL 3.8, the ArrowSchema::metadata field filled by the
2355 : * get_schema() callback may be set with the potential following items:
2356 : * <ul>
2357 : * <li>"GDAL:OGR:alternative_name": value of
2358 : * OGRFieldDefn::GetAlternativeNameRef()</li>
2359 : * <li>"GDAL:OGR:comment": value of OGRFieldDefn::GetComment()</li>
2360 : * <li>"GDAL:OGR:default": value of OGRFieldDefn::GetDefault()</li>
2361 : * <li>"GDAL:OGR:subtype": value of OGRFieldDefn::GetSubType()</li>
2362 : * <li>"GDAL:OGR:width": value of OGRFieldDefn::GetWidth() (serialized as a
2363 : * string)</li>
2364 : * <li>"GDAL:OGR:unique": value of OGRFieldDefn::IsUnique() (serialized as
2365 : * "true" or "false")</li>
2366 : * <li>"GDAL:OGR:domain_name": value of OGRFieldDefn::GetDomainName()</li>
2367 : * </ul>
2368 : *
2369 : * A potential usage can be:
2370 : \code{.cpp}
2371 : struct ArrowArrayStream stream;
2372 : if( !poLayer->GetArrowStream(&stream, nullptr))
2373 : {
2374 : CPLError(CE_Failure, CPLE_AppDefined, "GetArrowStream() failed\n");
2375 : exit(1);
2376 : }
2377 : struct ArrowSchema schema;
2378 : if( stream.get_schema(&stream, &schema) == 0 )
2379 : {
2380 : // Do something useful
2381 : schema.release(schema);
2382 : }
2383 : while( true )
2384 : {
2385 : struct ArrowArray array;
2386 : // Look for an error (get_next() returning a non-zero code), or
2387 : // end of iteration (array.release == nullptr)
2388 : if( stream.get_next(&stream, &array) != 0 ||
2389 : array.release == nullptr )
2390 : {
2391 : break;
2392 : }
2393 : // Do something useful
2394 : array.release(&array);
2395 : }
2396 : stream.release(&stream);
2397 : \endcode
2398 : *
2399 : * A full example is available in the
2400 : * <a
2401 : href="https://gdal.org/tutorials/vector_api_tut.html#reading-from-ogr-using-the-arrow-c-stream-data-interface">Reading
2402 : From OGR using the Arrow C Stream data interface</a> tutorial.
2403 : *
2404 : * Options may be driver specific. The default implementation recognizes the
2405 : * following options:
2406 : * <ul>
2407 : * <li>INCLUDE_FID=YES/NO. Whether to include the FID column. Defaults to YES.
2408 : * </li>
2409 : * <li>MAX_FEATURES_IN_BATCH=integer. Maximum number of features to retrieve in
2410 : * a ArrowArray batch. Defaults to 65 536.</li>
2411 : * <li>TIMEZONE="unknown", "UTC", "(+|:)HH:MM" or any other value supported by
2412 : * Arrow. (GDAL >= 3.8)
2413 : * Override the timezone flag nominally provided by
2414 : * OGRFieldDefn::GetTZFlag(), and used for the Arrow field timezone
2415 : * declaration, with a user specified timezone.
2416 : * Note that datetime values in Arrow arrays are always stored in UTC, and
2417 : * that the time zone flag used by GDAL to convert to UTC is the one of the
2418 : * OGRField::Date::TZFlag member at the OGRFeature level. The conversion
2419 : * to UTC of a OGRField::Date is only done if both the timezone indicated by
2420 : * OGRField::Date::TZFlag and the one at the OGRFieldDefn level (or set by
2421 : * this TIMEZONE option) are not unknown.</li>
2422 : * <li>GEOMETRY_METADATA_ENCODING=OGC/GEOARROW (GDAL >= 3.8).
2423 : * The default is OGC, which will lead to setting
2424 : * the Arrow geometry column metadata to ARROW:extension:name=ogc.wkb.
2425 : * If setting to GEOMETRY_METADATA_ENCODING to GEOARROW,
2426 : * ARROW:extension:name=geoarrow.wkb and
2427 : * ARROW:extension:metadata={"crs": <projjson CRS representation>> are set.
2428 : * </li>
2429 : * </ul>
2430 : *
2431 : * The Arrow/Parquet drivers recognize the following option:
2432 : * <ul>
2433 : * <li>GEOMETRY_ENCODING=WKB. To force a fallback to the generic implementation
2434 : * when the native geometry encoding is not WKB. Otherwise the geometry
2435 : * will be returned with its native Arrow encoding
2436 : * (possibly using GeoArrow encoding).</li>
2437 : * </ul>
2438 : *
2439 : * @param out_stream Output stream. Must *not* be NULL. The content of the
2440 : * structure does not need to be initialized.
2441 : * @param papszOptions NULL terminated list of key=value options.
2442 : * @return true in case of success.
2443 : * @since GDAL 3.6
2444 : */
2445 2206 : bool OGRLayer::GetArrowStream(struct ArrowArrayStream *out_stream,
2446 : CSLConstList papszOptions)
2447 : {
2448 2206 : memset(out_stream, 0, sizeof(*out_stream));
2449 3754 : if (m_poSharedArrowArrayStreamPrivateData &&
2450 3754 : m_poSharedArrowArrayStreamPrivateData->m_bArrowArrayStreamInProgress)
2451 : {
2452 4 : CPLError(CE_Failure, CPLE_AppDefined,
2453 : "An arrow Arrow Stream is in progress on that layer. Only "
2454 : "one at a time is allowed in this implementation.");
2455 4 : return false;
2456 : }
2457 2202 : m_aosArrowArrayStreamOptions.Assign(CSLDuplicate(papszOptions), true);
2458 :
2459 2202 : out_stream->get_schema = OGRLayer::StaticGetArrowSchema;
2460 2202 : out_stream->get_next = OGRLayer::StaticGetNextArrowArray;
2461 2202 : out_stream->get_last_error = OGRLayer::GetLastErrorArrowArrayStream;
2462 2202 : out_stream->release = OGRLayer::ReleaseStream;
2463 :
2464 2202 : if (m_poSharedArrowArrayStreamPrivateData == nullptr)
2465 : {
2466 : m_poSharedArrowArrayStreamPrivateData =
2467 658 : std::make_shared<ArrowArrayStreamPrivateData>();
2468 658 : m_poSharedArrowArrayStreamPrivateData->m_poLayer = this;
2469 : }
2470 2202 : m_poSharedArrowArrayStreamPrivateData->m_bArrowArrayStreamInProgress = true;
2471 :
2472 : // Special case for "FID = constant", or "FID IN (constant1, ...., constantN)"
2473 2202 : m_poSharedArrowArrayStreamPrivateData->m_anQueriedFIDs.clear();
2474 2202 : m_poSharedArrowArrayStreamPrivateData->m_iQueriedFIDS = 0;
2475 2202 : if (m_poAttrQuery)
2476 : {
2477 : swq_expr_node *poNode =
2478 1054 : static_cast<swq_expr_node *>(m_poAttrQuery->GetSWQExpr());
2479 3162 : if (poNode->eNodeType == SNT_OPERATION &&
2480 1054 : (poNode->nOperation == SWQ_IN || poNode->nOperation == SWQ_EQ) &&
2481 825 : poNode->papoSubExpr[0]->eNodeType == SNT_COLUMN &&
2482 286 : poNode->papoSubExpr[0]->field_index ==
2483 2117 : GetLayerDefn()->GetFieldCount() + SPF_FID &&
2484 9 : TestCapability(OLCRandomRead))
2485 : {
2486 8 : std::set<GIntBig> oSetAlreadyListed;
2487 13 : for (int i = 1; i < poNode->nSubExprCount; ++i)
2488 : {
2489 27 : if (poNode->papoSubExpr[i]->eNodeType == SNT_CONSTANT &&
2490 18 : poNode->papoSubExpr[i]->field_type == SWQ_INTEGER64 &&
2491 9 : oSetAlreadyListed.find(poNode->papoSubExpr[i]->int_value) ==
2492 18 : oSetAlreadyListed.end())
2493 : {
2494 8 : oSetAlreadyListed.insert(poNode->papoSubExpr[i]->int_value);
2495 8 : m_poSharedArrowArrayStreamPrivateData->m_anQueriedFIDs
2496 8 : .push_back(poNode->papoSubExpr[i]->int_value);
2497 : }
2498 : }
2499 : }
2500 : }
2501 :
2502 2202 : auto poPrivateData = new ArrowArrayStreamPrivateDataSharedDataWrapper();
2503 2202 : poPrivateData->poShared = m_poSharedArrowArrayStreamPrivateData;
2504 2202 : out_stream->private_data = poPrivateData;
2505 2202 : return true;
2506 : }
2507 :
2508 : /************************************************************************/
2509 : /* OGR_L_GetArrowStream() */
2510 : /************************************************************************/
2511 :
2512 : /** Get a Arrow C stream.
2513 : *
2514 : * On successful return, and when the stream interfaces is no longer needed, it
2515 : * must be freed with out_stream->release(out_stream). Please carefully read
2516 : * https://arrow.apache.org/docs/format/CStreamInterface.html for more details
2517 : * on using Arrow C stream.
2518 : *
2519 : * The method may take into account ignored fields set with SetIgnoredFields()
2520 : * (the default implementation does), and should take into account filters set
2521 : * with SetSpatialFilter() and SetAttributeFilter(). Note however that
2522 : * specialized implementations may fallback to the default (slower)
2523 : * implementation when filters are set.
2524 : * Drivers that have a specialized implementation should
2525 : * advertise the OLCFastGetArrowStream capability.
2526 : *
2527 : * There are extra precautions to take into account in a OGR context. Unless
2528 : * otherwise specified by a particular driver implementation, the get_schema(),
2529 : * get_next() and get_last_error() function pointers of the ArrowArrayStream
2530 : * structure should no longer be used after the OGRLayer, from which the
2531 : * ArrowArrayStream structure was initialized, has been destroyed (typically at
2532 : * dataset closing). The reason is that those function pointers will typically
2533 : * point to methods of the OGRLayer instance.
2534 : * However, the ArrowSchema and ArrowArray structures filled from those
2535 : * callbacks can be used and must be released independently from the
2536 : * ArrowArrayStream or the layer.
2537 : *
2538 : * Furthermore, unless otherwise specified by a particular driver
2539 : * implementation, only one ArrowArrayStream can be active at a time on
2540 : * a given layer (that is the last active one must be explicitly released before
2541 : * a next one is asked). Changing filter state, ignored columns, modifying the
2542 : * schema or using ResetReading()/GetNextFeature() while using a
2543 : * ArrowArrayStream is strongly discouraged and may lead to unexpected results.
2544 : * As a rule of thumb, no OGRLayer methods that affect the state of a layer
2545 : * should be called on a layer, while an ArrowArrayStream on it is active.
2546 : *
2547 : * Starting with GDAL 3.8, the ArrowSchema::metadata field filled by the
2548 : * get_schema() callback may be set with the potential following items:
2549 : * <ul>
2550 : * <li>"GDAL:OGR:alternative_name": value of
2551 : * OGRFieldDefn::GetAlternativeNameRef()</li>
2552 : * <li>"GDAL:OGR:comment": value of OGRFieldDefn::GetComment()</li>
2553 : * <li>"GDAL:OGR:default": value of OGRFieldDefn::GetDefault()</li>
2554 : * <li>"GDAL:OGR:subtype": value of OGRFieldDefn::GetSubType()</li>
2555 : * <li>"GDAL:OGR:width": value of OGRFieldDefn::GetWidth() (serialized as a
2556 : * string)</li>
2557 : * <li>"GDAL:OGR:unique": value of OGRFieldDefn::IsUnique() (serialized as
2558 : * "true" or "false")</li>
2559 : * <li>"GDAL:OGR:domain_name": value of OGRFieldDefn::GetDomainName()</li>
2560 : * </ul>
2561 : *
2562 : * A potential usage can be:
2563 : \code{.cpp}
2564 : struct ArrowArrayStream stream;
2565 : if( !OGR_L_GetArrowStream(hLayer, &stream, nullptr))
2566 : {
2567 : CPLError(CE_Failure, CPLE_AppDefined,
2568 : "OGR_L_GetArrowStream() failed\n");
2569 : exit(1);
2570 : }
2571 : struct ArrowSchema schema;
2572 : if( stream.get_schema(&stream, &schema) == 0 )
2573 : {
2574 : // Do something useful
2575 : schema.release(schema);
2576 : }
2577 : while( true )
2578 : {
2579 : struct ArrowArray array;
2580 : // Look for an error (get_next() returning a non-zero code), or
2581 : // end of iteration (array.release == nullptr)
2582 : if( stream.get_next(&stream, &array) != 0 ||
2583 : array.release == nullptr )
2584 : {
2585 : break;
2586 : }
2587 : // Do something useful
2588 : array.release(&array);
2589 : }
2590 : stream.release(&stream);
2591 : \endcode
2592 : *
2593 : * A full example is available in the
2594 : * <a
2595 : href="https://gdal.org/tutorials/vector_api_tut.html#reading-from-ogr-using-the-arrow-c-stream-data-interface">Reading
2596 : From OGR using the Arrow C Stream data interface</a> tutorial.
2597 : *
2598 : * Options may be driver specific. The default implementation recognizes the
2599 : * following options:
2600 : * <ul>
2601 : * <li>INCLUDE_FID=YES/NO. Whether to include the FID column. Defaults to
2602 : YES.</li>
2603 : * <li>MAX_FEATURES_IN_BATCH=integer. Maximum number of features to retrieve in
2604 : * a ArrowArray batch. Defaults to 65 536.</li>
2605 : * <li>TIMEZONE="unknown", "UTC", "(+|:)HH:MM" or any other value supported by
2606 : * Arrow. (GDAL >= 3.8)
2607 : * Override the timezone flag nominally provided by
2608 : * OGRFieldDefn::GetTZFlag(), and used for the Arrow field timezone
2609 : * declaration, with a user specified timezone.
2610 : * Note that datetime values in Arrow arrays are always stored in UTC, and
2611 : * that the time zone flag used by GDAL to convert to UTC is the one of the
2612 : * OGRField::Date::TZFlag member at the OGRFeature level. The conversion
2613 : * to UTC of a OGRField::Date is only done if both the timezone indicated by
2614 : * OGRField::Date::TZFlag and the one at the OGRFieldDefn level (or set by
2615 : * this TIMEZONE option) are not unknown.</li>
2616 : * <li>GEOMETRY_METADATA_ENCODING=OGC/GEOARROW (GDAL >= 3.8).
2617 : * The default is OGC, which will lead to setting
2618 : * the Arrow geometry column metadata to ARROW:extension:name=ogc.wkb.
2619 : * If setting to GEOMETRY_METADATA_ENCODING to GEOARROW,
2620 : * ARROW:extension:name=geoarrow.wkb and
2621 : * ARROW:extension:metadata={"crs": <projjson CRS representation>> are set.
2622 : * </li>
2623 : * </ul>
2624 : *
2625 : * The Arrow/Parquet drivers recognize the following option:
2626 : * <ul>
2627 : * <li>GEOMETRY_ENCODING=WKB. To force a fallback to the generic implementation
2628 : * when the native geometry encoding is not WKB. Otherwise the geometry
2629 : * will be returned with its native Arrow encoding
2630 : * (possibly using GeoArrow encoding).</li>
2631 : * </ul>
2632 : *
2633 : * @param hLayer Layer
2634 : * @param out_stream Output stream. Must *not* be NULL. The content of the
2635 : * structure does not need to be initialized.
2636 : * @param papszOptions NULL terminated list of key=value options.
2637 : * @return true in case of success.
2638 : * @since GDAL 3.6
2639 : */
2640 366 : bool OGR_L_GetArrowStream(OGRLayerH hLayer, struct ArrowArrayStream *out_stream,
2641 : char **papszOptions)
2642 : {
2643 366 : VALIDATE_POINTER1(hLayer, "OGR_L_GetArrowStream", false);
2644 366 : VALIDATE_POINTER1(out_stream, "OGR_L_GetArrowStream", false);
2645 :
2646 732 : return OGRLayer::FromHandle(hLayer)->GetArrowStream(out_stream,
2647 366 : papszOptions);
2648 : }
2649 :
2650 : /************************************************************************/
2651 : /* OGRParseArrowMetadata() */
2652 : /************************************************************************/
2653 :
2654 : std::map<std::string, std::string>
2655 185 : OGRParseArrowMetadata(const char *pabyMetadata)
2656 : {
2657 185 : std::map<std::string, std::string> oMetadata;
2658 : int32_t nKVP;
2659 185 : memcpy(&nKVP, pabyMetadata, sizeof(int32_t));
2660 185 : pabyMetadata += sizeof(int32_t);
2661 379 : for (int i = 0; i < nKVP; ++i)
2662 : {
2663 : int32_t nSizeKey;
2664 194 : memcpy(&nSizeKey, pabyMetadata, sizeof(int32_t));
2665 194 : pabyMetadata += sizeof(int32_t);
2666 388 : std::string osKey;
2667 194 : osKey.assign(pabyMetadata, nSizeKey);
2668 194 : pabyMetadata += nSizeKey;
2669 :
2670 : int32_t nSizeValue;
2671 194 : memcpy(&nSizeValue, pabyMetadata, sizeof(int32_t));
2672 194 : pabyMetadata += sizeof(int32_t);
2673 388 : std::string osValue;
2674 194 : osValue.assign(pabyMetadata, nSizeValue);
2675 194 : pabyMetadata += nSizeValue;
2676 :
2677 194 : oMetadata[osKey] = std::move(osValue);
2678 : }
2679 :
2680 370 : return oMetadata;
2681 : }
2682 :
2683 : /************************************************************************/
2684 : /* ParseDecimalFormat() */
2685 : /************************************************************************/
2686 :
2687 686 : static bool ParseDecimalFormat(const char *format, int &nPrecision, int &nScale,
2688 : int &nWidthInBytes)
2689 : {
2690 : // d:19,10 ==> decimal128 [precision 19, scale 10]
2691 : // d:19,10,NNN ==> decimal bitwidth = NNN [precision 19, scale 10]
2692 686 : nPrecision = 0;
2693 686 : nScale = 0;
2694 686 : nWidthInBytes = 128 / 8; // 128 bit
2695 686 : const char *pszFirstComma = strchr(format + 2, ',');
2696 686 : if (pszFirstComma)
2697 : {
2698 686 : nPrecision = atoi(format + 2);
2699 686 : nScale = atoi(pszFirstComma + 1);
2700 686 : const char *pszSecondComma = strchr(pszFirstComma + 1, ',');
2701 686 : if (pszSecondComma)
2702 : {
2703 274 : const int nWidthInBits = atoi(pszSecondComma + 1);
2704 274 : if ((nWidthInBits % 8) != 0)
2705 : {
2706 : // shouldn't happen for well-format schemas
2707 0 : nWidthInBytes = 0;
2708 0 : return false;
2709 : }
2710 : else
2711 : {
2712 274 : nWidthInBytes = nWidthInBits / 8;
2713 : }
2714 : }
2715 : }
2716 : else
2717 : {
2718 : // shouldn't happen for well-format schemas
2719 0 : nWidthInBytes = 0;
2720 0 : return false;
2721 : }
2722 686 : return true;
2723 : }
2724 :
2725 : /************************************************************************/
2726 : /* GetErrorIfUnsupportedDecimal() */
2727 : /************************************************************************/
2728 :
2729 55 : static const char *GetErrorIfUnsupportedDecimal(int nWidthInBytes,
2730 : int nPrecision)
2731 : {
2732 :
2733 55 : if (nWidthInBytes != 128 / 8 && nWidthInBytes != 256 / 8)
2734 : {
2735 0 : return "For decimal field, only width 128 and 256 are supported";
2736 : }
2737 :
2738 : // precision=19 fits on 64 bits
2739 55 : if (nPrecision <= 0 || nPrecision > 19)
2740 : {
2741 0 : return "For decimal field, only precision up to 19 is supported";
2742 : }
2743 :
2744 55 : return nullptr;
2745 : }
2746 :
2747 : /************************************************************************/
2748 : /* IsHandledSchema() */
2749 : /************************************************************************/
2750 :
2751 15760 : static bool IsHandledSchema(bool bTopLevel, const struct ArrowSchema *schema,
2752 : const std::string &osPrefix, bool bHasAttrQuery,
2753 : const CPLStringList &aosUsedFields)
2754 : {
2755 15760 : const char *format = schema->format;
2756 15760 : if (IsStructure(format))
2757 : {
2758 12285 : for (int64_t i = 0; i < schema->n_children; ++i)
2759 : {
2760 44156 : if (!IsHandledSchema(/* bTopLevel = */ false,
2761 11039 : schema->children[static_cast<size_t>(i)],
2762 24772 : bTopLevel ? std::string()
2763 13733 : : osPrefix + schema->name + ".",
2764 : bHasAttrQuery, aosUsedFields))
2765 : {
2766 0 : return false;
2767 : }
2768 : }
2769 1246 : return true;
2770 : }
2771 :
2772 : // Lists or maps
2773 25169 : if (IsList(format) || IsLargeList(format) || IsFixedSizeList(format) ||
2774 10655 : IsMap(format))
2775 : {
2776 4566 : if (!IsHandledSchema(/* bTopLevel = */ false, schema->children[0],
2777 : osPrefix, bHasAttrQuery, aosUsedFields))
2778 : {
2779 0 : return false;
2780 : }
2781 : // For now, we can't filter on lists or maps
2782 4566 : if (aosUsedFields.FindString((osPrefix + schema->name).c_str()) >= 0)
2783 : {
2784 0 : CPLDebug("OGR",
2785 : "Field %s has unhandled format '%s' for an "
2786 : "attribute to filter on",
2787 0 : (osPrefix + schema->name).c_str(), format);
2788 0 : return false;
2789 : }
2790 4566 : return true;
2791 : }
2792 :
2793 9948 : const char *const apszHandledFormats[] = {
2794 : "b", // boolean
2795 : "c", // int8
2796 : "C", // uint8
2797 : "s", // int16
2798 : "S", // uint16
2799 : "i", // int32
2800 : "I", // uint32
2801 : "l", // int64
2802 : "L", // uint64
2803 : "e", // float16
2804 : "f", // float32
2805 : "g", // float64,
2806 : "z", // binary
2807 : "Z", // large binary
2808 : "u", // UTF-8 string
2809 : "U", // large UTF-8 string
2810 : "tdD", // date32[days]
2811 : "tdm", // date64[milliseconds]
2812 : "tts", //time32 [seconds]
2813 : "ttm", //time32 [milliseconds]
2814 : "ttu", //time64 [microseconds]
2815 : "ttn", //time64 [nanoseconds]
2816 : };
2817 :
2818 115231 : for (const char *pszHandledFormat : apszHandledFormats)
2819 : {
2820 113923 : if (strcmp(format, pszHandledFormat) == 0)
2821 : {
2822 8640 : return true;
2823 : }
2824 : }
2825 :
2826 1308 : if (IsDecimal(format))
2827 : {
2828 790 : if (bHasAttrQuery &&
2829 790 : aosUsedFields.FindString((osPrefix + schema->name).c_str()) >= 0)
2830 : {
2831 2 : int nPrecision = 0;
2832 2 : int nScale = 0;
2833 2 : int nWidthInBytes = 0;
2834 2 : if (!ParseDecimalFormat(format, nPrecision, nScale, nWidthInBytes))
2835 : {
2836 0 : CPLDebug("OGR", "%s",
2837 0 : (std::string("Invalid field format ") + format +
2838 0 : " for field " + osPrefix + schema->name)
2839 : .c_str());
2840 0 : return false;
2841 : }
2842 :
2843 : const char *pszError =
2844 2 : GetErrorIfUnsupportedDecimal(nWidthInBytes, nPrecision);
2845 2 : if (pszError)
2846 : {
2847 0 : CPLDebug("OGR", "%s", pszError);
2848 0 : return false;
2849 : }
2850 : }
2851 412 : return true;
2852 : }
2853 :
2854 896 : if (IsFixedWidthBinary(format) || IsTimestamp(format))
2855 : {
2856 896 : return true;
2857 : }
2858 :
2859 0 : CPLDebug("OGR", "Field %s has unhandled format '%s'",
2860 0 : (osPrefix + schema->name).c_str(), format);
2861 0 : return false;
2862 : }
2863 :
2864 : /************************************************************************/
2865 : /* OGRLayer::CanPostFilterArrowArray() */
2866 : /************************************************************************/
2867 :
2868 : /** Whether the PostFilterArrowArray() can work on the schema to remove
2869 : * rows that aren't selected by the spatial or attribute filter.
2870 : */
2871 155 : bool OGRLayer::CanPostFilterArrowArray(const struct ArrowSchema *schema) const
2872 : {
2873 155 : if (!IsHandledSchema(
2874 155 : /* bTopLevel=*/true, schema, std::string(),
2875 155 : m_poAttrQuery != nullptr,
2876 310 : m_poAttrQuery ? CPLStringList(m_poAttrQuery->GetUsedFields())
2877 : : CPLStringList()))
2878 : {
2879 0 : return false;
2880 : }
2881 :
2882 155 : if (m_poFilterGeom)
2883 : {
2884 22 : bool bFound = false;
2885 : const char *pszGeomFieldName =
2886 : const_cast<OGRLayer *>(this)
2887 22 : ->GetLayerDefn()
2888 22 : ->GetGeomFieldDefn(m_iGeomFieldFilter)
2889 22 : ->GetNameRef();
2890 839 : for (int64_t i = 0; i < schema->n_children; ++i)
2891 : {
2892 839 : const auto fieldSchema = schema->children[i];
2893 839 : if (strcmp(fieldSchema->name, pszGeomFieldName) == 0)
2894 : {
2895 23 : if (!IsBinary(fieldSchema->format) &&
2896 1 : !IsLargeBinary(fieldSchema->format))
2897 : {
2898 1 : CPLDebug("OGR", "Geometry field %s has handled format '%s'",
2899 : fieldSchema->name, fieldSchema->format);
2900 1 : return false;
2901 : }
2902 :
2903 : // Check if ARROW:extension:name = ogc.wkb
2904 21 : const char *pabyMetadata = fieldSchema->metadata;
2905 21 : if (!pabyMetadata)
2906 : {
2907 0 : CPLDebug(
2908 : "OGR",
2909 : "Geometry field %s lacks metadata in its schema field",
2910 : fieldSchema->name);
2911 0 : return false;
2912 : }
2913 :
2914 21 : const auto oMetadata = OGRParseArrowMetadata(pabyMetadata);
2915 21 : auto oIter = oMetadata.find(ARROW_EXTENSION_NAME_KEY);
2916 21 : if (oIter == oMetadata.end())
2917 : {
2918 0 : CPLDebug("OGR",
2919 : "Geometry field %s lacks "
2920 : "%s metadata "
2921 : "in its schema field",
2922 : fieldSchema->name, ARROW_EXTENSION_NAME_KEY);
2923 0 : return false;
2924 : }
2925 21 : if (oIter->second != EXTENSION_NAME_OGC_WKB &&
2926 0 : oIter->second != EXTENSION_NAME_GEOARROW_WKB)
2927 : {
2928 0 : CPLDebug("OGR",
2929 : "Geometry field %s has unexpected "
2930 : "%s = '%s' metadata "
2931 : "in its schema field",
2932 : fieldSchema->name, ARROW_EXTENSION_NAME_KEY,
2933 0 : oIter->second.c_str());
2934 0 : return false;
2935 : }
2936 :
2937 21 : bFound = true;
2938 21 : break;
2939 : }
2940 : }
2941 21 : if (!bFound)
2942 : {
2943 0 : CPLDebug("OGR", "Cannot find geometry field %s in schema",
2944 : pszGeomFieldName);
2945 0 : return false;
2946 : }
2947 : }
2948 :
2949 154 : return true;
2950 : }
2951 :
2952 : #if 0
2953 : /************************************************************************/
2954 : /* CheckValidityBuffer() */
2955 : /************************************************************************/
2956 :
2957 : static void CheckValidityBuffer(const struct ArrowArray *array)
2958 : {
2959 : if (array->null_count < 0)
2960 : return;
2961 : const uint8_t *pabyValidity =
2962 : static_cast<const uint8_t *>(const_cast<const void *>(array->buffers[0]));
2963 : if( !pabyValidity )
2964 : {
2965 : CPLAssert(array->null_count == 0);
2966 : return;
2967 : }
2968 : size_t null_count = 0;
2969 : const size_t nOffset = static_cast<size_t>(array->offset);
2970 : for(size_t i = 0; i < static_cast<size_t>(array->length); ++i )
2971 : {
2972 : if (!TestBit(pabyValidity, i + nOffset))
2973 : ++ null_count;
2974 : }
2975 : CPLAssert(static_cast<size_t>(array->null_count) == null_count);
2976 : }
2977 : #endif
2978 :
2979 : /************************************************************************/
2980 : /* CompactValidityBuffer() */
2981 : /************************************************************************/
2982 :
2983 7610 : static void CompactValidityBuffer(
2984 : const struct ArrowSchema *, struct ArrowArray *array, size_t iStart,
2985 : const std::vector<bool> &abyValidityFromFilters, size_t nNewLength)
2986 : {
2987 : // Invalidate null_count as the same validity buffer may be used when
2988 : // scrolling batches, and this creates confusion if we try to set it
2989 : // to different values among the batches
2990 7610 : if (array->null_count <= 0)
2991 : {
2992 4186 : array->null_count = -1;
2993 4186 : return;
2994 : }
2995 3424 : array->null_count = -1;
2996 :
2997 3424 : CPLAssert(static_cast<size_t>(array->length) >=
2998 : iStart + abyValidityFromFilters.size());
2999 3424 : uint8_t *pabyValidity =
3000 3424 : static_cast<uint8_t *>(const_cast<void *>(array->buffers[0]));
3001 3424 : const size_t nLength = abyValidityFromFilters.size();
3002 3424 : const size_t nOffset = static_cast<size_t>(array->offset);
3003 3424 : size_t j = iStart + nOffset;
3004 12563 : for (size_t i = 0; i < nLength && j < nNewLength + nOffset; ++i)
3005 : {
3006 9139 : if (abyValidityFromFilters[i])
3007 : {
3008 5663 : if (TestBit(pabyValidity, i + iStart + nOffset))
3009 4307 : SetBit(pabyValidity, j);
3010 : else
3011 1356 : UnsetBit(pabyValidity, j);
3012 5663 : ++j;
3013 : }
3014 : }
3015 : }
3016 :
3017 : /************************************************************************/
3018 : /* CompactBoolArray() */
3019 : /************************************************************************/
3020 :
3021 224 : static void CompactBoolArray(const struct ArrowSchema *schema,
3022 : struct ArrowArray *array, size_t iStart,
3023 : const std::vector<bool> &abyValidityFromFilters,
3024 : size_t nNewLength)
3025 : {
3026 224 : CPLAssert(array->n_children == 0);
3027 224 : CPLAssert(array->n_buffers == 2);
3028 224 : CPLAssert(static_cast<size_t>(array->length) >=
3029 : iStart + abyValidityFromFilters.size());
3030 :
3031 224 : const size_t nLength = abyValidityFromFilters.size();
3032 224 : const size_t nOffset = static_cast<size_t>(array->offset);
3033 224 : uint8_t *pabyData =
3034 224 : static_cast<uint8_t *>(const_cast<void *>(array->buffers[1]));
3035 224 : size_t j = iStart + nOffset;
3036 1147 : for (size_t i = 0; i < nLength; ++i)
3037 : {
3038 923 : if (abyValidityFromFilters[i])
3039 : {
3040 424 : if (TestBit(pabyData, i + iStart + nOffset))
3041 199 : SetBit(pabyData, j);
3042 : else
3043 225 : UnsetBit(pabyData, j);
3044 :
3045 424 : ++j;
3046 : }
3047 : }
3048 :
3049 224 : if (schema->flags & ARROW_FLAG_NULLABLE)
3050 224 : CompactValidityBuffer(schema, array, iStart, abyValidityFromFilters,
3051 : nNewLength);
3052 :
3053 224 : array->length = nNewLength;
3054 224 : }
3055 :
3056 : /************************************************************************/
3057 : /* CompactPrimitiveArray() */
3058 : /************************************************************************/
3059 :
3060 : template <class T>
3061 3503 : static void CompactPrimitiveArray(
3062 : const struct ArrowSchema *schema, struct ArrowArray *array, size_t iStart,
3063 : const std::vector<bool> &abyValidityFromFilters, size_t nNewLength)
3064 : {
3065 3503 : CPLAssert(array->n_children == 0);
3066 3503 : CPLAssert(array->n_buffers == 2);
3067 3503 : CPLAssert(static_cast<size_t>(array->length) >=
3068 : iStart + abyValidityFromFilters.size());
3069 :
3070 3503 : const size_t nLength = abyValidityFromFilters.size();
3071 3503 : const size_t nOffset = static_cast<size_t>(array->offset);
3072 3503 : T *paData =
3073 3503 : static_cast<T *>(const_cast<void *>(array->buffers[1])) + nOffset;
3074 3503 : size_t j = iStart;
3075 17682 : for (size_t i = 0; i < nLength; ++i)
3076 : {
3077 14179 : if (abyValidityFromFilters[i])
3078 : {
3079 6206 : paData[j] = paData[i + iStart];
3080 6206 : ++j;
3081 : }
3082 : }
3083 :
3084 3503 : if (schema->flags & ARROW_FLAG_NULLABLE)
3085 3492 : CompactValidityBuffer(schema, array, iStart, abyValidityFromFilters,
3086 : nNewLength);
3087 :
3088 3503 : array->length = nNewLength;
3089 3503 : }
3090 :
3091 : /************************************************************************/
3092 : /* CompactStringOrBinaryArray() */
3093 : /************************************************************************/
3094 :
3095 : template <class OffsetType>
3096 1187 : static void CompactStringOrBinaryArray(
3097 : const struct ArrowSchema *schema, struct ArrowArray *array, size_t iStart,
3098 : const std::vector<bool> &abyValidityFromFilters, size_t nNewLength)
3099 : {
3100 1187 : CPLAssert(array->n_children == 0);
3101 1187 : CPLAssert(array->n_buffers == 3);
3102 1187 : CPLAssert(static_cast<size_t>(array->length) >=
3103 : iStart + abyValidityFromFilters.size());
3104 :
3105 1187 : const size_t nLength = abyValidityFromFilters.size();
3106 1187 : const size_t nOffset = static_cast<size_t>(array->offset);
3107 1187 : OffsetType *panOffsets =
3108 1187 : static_cast<OffsetType *>(const_cast<void *>(array->buffers[1])) +
3109 : nOffset;
3110 1187 : GByte *pabyData =
3111 1187 : static_cast<GByte *>(const_cast<void *>(array->buffers[2]));
3112 1187 : size_t j = iStart;
3113 1187 : OffsetType nCurOffset = panOffsets[iStart];
3114 5103 : for (size_t i = 0; i < nLength; ++i)
3115 : {
3116 3916 : if (abyValidityFromFilters[i])
3117 : {
3118 1768 : const auto nStartOffset = panOffsets[i + iStart];
3119 1768 : const auto nEndOffset = panOffsets[i + iStart + 1];
3120 1768 : panOffsets[j] = nCurOffset;
3121 1768 : const auto nSize = static_cast<size_t>(nEndOffset - nStartOffset);
3122 1768 : if (nSize)
3123 : {
3124 1562 : if (nCurOffset < nStartOffset)
3125 : {
3126 636 : memmove(pabyData + nCurOffset, pabyData + nStartOffset,
3127 : nSize);
3128 : }
3129 1562 : nCurOffset += static_cast<OffsetType>(nSize);
3130 : }
3131 1768 : ++j;
3132 : }
3133 : }
3134 1187 : panOffsets[j] = nCurOffset;
3135 :
3136 1187 : if (schema->flags & ARROW_FLAG_NULLABLE)
3137 806 : CompactValidityBuffer(schema, array, iStart, abyValidityFromFilters,
3138 : nNewLength);
3139 :
3140 1187 : array->length = nNewLength;
3141 1187 : }
3142 :
3143 : /************************************************************************/
3144 : /* CompactFixedWidthArray() */
3145 : /************************************************************************/
3146 :
3147 : static void
3148 305 : CompactFixedWidthArray(const struct ArrowSchema *schema,
3149 : struct ArrowArray *array, int nWidth, size_t iStart,
3150 : const std::vector<bool> &abyValidityFromFilters,
3151 : size_t nNewLength)
3152 : {
3153 305 : CPLAssert(array->n_children == 0);
3154 305 : CPLAssert(array->n_buffers == 2);
3155 305 : CPLAssert(static_cast<size_t>(array->length) >=
3156 : iStart + abyValidityFromFilters.size());
3157 :
3158 305 : const size_t nLength = abyValidityFromFilters.size();
3159 305 : const size_t nOffset = static_cast<size_t>(array->offset);
3160 305 : GByte *pabyData =
3161 305 : static_cast<GByte *>(const_cast<void *>(array->buffers[1]));
3162 305 : size_t nStartOffset = (iStart + nOffset) * nWidth;
3163 305 : size_t nCurOffset = nStartOffset;
3164 1133 : for (size_t i = 0; i < nLength; ++i, nStartOffset += nWidth)
3165 : {
3166 828 : if (abyValidityFromFilters[i])
3167 : {
3168 391 : if (nCurOffset < nStartOffset)
3169 : {
3170 210 : memcpy(pabyData + nCurOffset, pabyData + nStartOffset, nWidth);
3171 : }
3172 391 : nCurOffset += nWidth;
3173 : }
3174 : }
3175 :
3176 305 : if (schema->flags & ARROW_FLAG_NULLABLE)
3177 305 : CompactValidityBuffer(schema, array, iStart, abyValidityFromFilters,
3178 : nNewLength);
3179 :
3180 305 : array->length = nNewLength;
3181 305 : }
3182 :
3183 : /************************************************************************/
3184 : /* CompactStructArray() */
3185 : /************************************************************************/
3186 :
3187 : static bool CompactArray(const struct ArrowSchema *schema,
3188 : struct ArrowArray *array, size_t iStart,
3189 : const std::vector<bool> &abyValidityFromFilters,
3190 : size_t nNewLength);
3191 :
3192 665 : static bool CompactStructArray(const struct ArrowSchema *schema,
3193 : struct ArrowArray *array, size_t iStart,
3194 : const std::vector<bool> &abyValidityFromFilters,
3195 : size_t nNewLength)
3196 : {
3197 : // The equality might not be strict in the case of when some sub-arrays
3198 : // are fully void !
3199 665 : CPLAssert(array->n_children <= schema->n_children);
3200 6646 : for (int64_t iField = 0; iField < array->n_children; ++iField)
3201 : {
3202 5981 : const auto psChildSchema = schema->children[iField];
3203 5981 : const auto psChildArray = array->children[iField];
3204 : // To please Arrow validation...
3205 5981 : const size_t nChildNewLength =
3206 5981 : static_cast<size_t>(array->offset) + nNewLength;
3207 5981 : if (psChildArray->length > array->length)
3208 : {
3209 120 : std::vector<bool> abyChildValidity(abyValidityFromFilters);
3210 120 : abyChildValidity.resize(
3211 120 : abyValidityFromFilters.size() +
3212 120 : static_cast<size_t>(psChildArray->length - array->length),
3213 : false);
3214 120 : if (!CompactArray(psChildSchema, psChildArray, iStart,
3215 : abyChildValidity, nChildNewLength))
3216 : {
3217 0 : return false;
3218 : }
3219 : }
3220 : else
3221 : {
3222 5861 : if (!CompactArray(psChildSchema, psChildArray, iStart,
3223 : abyValidityFromFilters, nChildNewLength))
3224 : {
3225 0 : return false;
3226 : }
3227 : }
3228 5981 : CPLAssert(psChildArray->length ==
3229 : static_cast<int64_t>(nChildNewLength));
3230 : }
3231 :
3232 665 : if (schema->flags & ARROW_FLAG_NULLABLE)
3233 201 : CompactValidityBuffer(schema, array, iStart, abyValidityFromFilters,
3234 : nNewLength);
3235 :
3236 665 : array->length = nNewLength;
3237 :
3238 665 : return true;
3239 : }
3240 :
3241 : /************************************************************************/
3242 : /* InvalidateNullCountRec() */
3243 : /************************************************************************/
3244 :
3245 570 : static void InvalidateNullCountRec(const struct ArrowSchema *schema,
3246 : struct ArrowArray *array)
3247 : {
3248 570 : if (schema->flags & ARROW_FLAG_NULLABLE)
3249 210 : array->null_count = -1;
3250 960 : for (int i = 0; i < array->n_children; ++i)
3251 390 : InvalidateNullCountRec(schema->children[i], array->children[i]);
3252 570 : }
3253 :
3254 : /************************************************************************/
3255 : /* CompactListArray() */
3256 : /************************************************************************/
3257 :
3258 : template <class OffsetType>
3259 1773 : static bool CompactListArray(const struct ArrowSchema *schema,
3260 : struct ArrowArray *array, size_t iStart,
3261 : const std::vector<bool> &abyValidityFromFilters,
3262 : size_t nNewLength)
3263 : {
3264 1773 : CPLAssert(static_cast<size_t>(array->length) >=
3265 : iStart + abyValidityFromFilters.size());
3266 1773 : CPLAssert(array->n_children == 1);
3267 1773 : CPLAssert(array->n_buffers == 2);
3268 :
3269 1773 : const auto psChildSchema = schema->children[0];
3270 1773 : const auto psChildArray = array->children[0];
3271 :
3272 1773 : const size_t nLength = abyValidityFromFilters.size();
3273 1773 : const size_t nOffset = static_cast<size_t>(array->offset);
3274 1773 : OffsetType *panOffsets =
3275 1773 : static_cast<OffsetType *>(const_cast<void *>(array->buffers[1])) +
3276 : nOffset;
3277 :
3278 1773 : if (panOffsets[iStart + nLength] > panOffsets[iStart])
3279 : {
3280 3186 : std::vector<bool> abyChildValidity(
3281 1593 : static_cast<size_t>(panOffsets[iStart + nLength] -
3282 1593 : panOffsets[iStart]),
3283 : true);
3284 1593 : size_t j = iStart;
3285 1593 : OffsetType nCurOffset = panOffsets[iStart];
3286 6694 : for (size_t i = 0; i < nLength; ++i)
3287 : {
3288 5101 : if (abyValidityFromFilters[i])
3289 : {
3290 2142 : const auto nSize =
3291 2142 : panOffsets[i + iStart + 1] - panOffsets[i + iStart];
3292 2142 : panOffsets[j] = nCurOffset;
3293 2142 : nCurOffset += nSize;
3294 2142 : ++j;
3295 : }
3296 : else
3297 : {
3298 2959 : const auto nStartOffset = panOffsets[i + iStart];
3299 2959 : const auto nEndOffset = panOffsets[i + iStart + 1];
3300 2959 : if (nStartOffset != nEndOffset)
3301 : {
3302 3073 : if (nStartOffset >=
3303 1538 : panOffsets[iStart] + abyChildValidity.size())
3304 : {
3305 : // shouldn't happen in sane arrays...
3306 0 : CPLError(CE_Failure, CPLE_AppDefined,
3307 : "nStartOffset >= panOffsets[iStart] + "
3308 : "abyChildValidity.size()");
3309 0 : return false;
3310 : }
3311 : // nEndOffset might be equal to abyChildValidity.size()
3312 3073 : if (nEndOffset >
3313 1538 : panOffsets[iStart] + abyChildValidity.size())
3314 : {
3315 : // shouldn't happen in sane arrays...
3316 0 : CPLError(CE_Failure, CPLE_AppDefined,
3317 : "nEndOffset > panOffsets[iStart] + "
3318 : "abyChildValidity.size()");
3319 0 : return false;
3320 : }
3321 1538 : for (auto k = nStartOffset - panOffsets[iStart];
3322 4652 : k < nEndOffset - panOffsets[iStart]; ++k)
3323 3114 : abyChildValidity[static_cast<size_t>(k)] = false;
3324 : }
3325 : }
3326 : }
3327 1593 : panOffsets[j] = nCurOffset;
3328 1593 : const size_t nChildNewLength = static_cast<size_t>(panOffsets[j]);
3329 : // To please Arrow validation
3330 4552 : for (; j < iStart + nLength; ++j)
3331 2959 : panOffsets[j] = nCurOffset;
3332 :
3333 1593 : if (!CompactArray(psChildSchema, psChildArray,
3334 1593 : static_cast<size_t>(panOffsets[iStart]),
3335 : abyChildValidity, nChildNewLength))
3336 0 : return false;
3337 :
3338 1593 : CPLAssert(psChildArray->length ==
3339 : static_cast<int64_t>(nChildNewLength));
3340 : }
3341 : else
3342 : {
3343 180 : InvalidateNullCountRec(psChildSchema, psChildArray);
3344 : }
3345 :
3346 1773 : if (schema->flags & ARROW_FLAG_NULLABLE)
3347 1773 : CompactValidityBuffer(schema, array, iStart, abyValidityFromFilters,
3348 : nNewLength);
3349 :
3350 1773 : array->length = nNewLength;
3351 :
3352 1773 : return true;
3353 : }
3354 :
3355 : /************************************************************************/
3356 : /* CompactFixedSizeListArray() */
3357 : /************************************************************************/
3358 :
3359 : static bool
3360 809 : CompactFixedSizeListArray(const struct ArrowSchema *schema,
3361 : struct ArrowArray *array, size_t N, size_t iStart,
3362 : const std::vector<bool> &abyValidityFromFilters,
3363 : size_t nNewLength)
3364 : {
3365 809 : CPLAssert(static_cast<size_t>(array->length) >=
3366 : iStart + abyValidityFromFilters.size());
3367 809 : CPLAssert(array->n_children == 1);
3368 :
3369 809 : const auto psChildSchema = schema->children[0];
3370 809 : const auto psChildArray = array->children[0];
3371 :
3372 809 : const size_t nLength = abyValidityFromFilters.size();
3373 809 : const size_t nOffset = static_cast<size_t>(array->offset);
3374 1618 : std::vector<bool> abyChildValidity(N * nLength, true);
3375 809 : size_t nChildNewLength = (iStart + nOffset) * N;
3376 809 : size_t nSrcLength = 0;
3377 3198 : for (size_t i = 0; i < nLength; ++i)
3378 : {
3379 2389 : if (abyValidityFromFilters[i])
3380 : {
3381 1015 : nChildNewLength += N;
3382 1015 : nSrcLength++;
3383 : }
3384 : else
3385 : {
3386 1374 : const size_t nStartOffset = i * N;
3387 1374 : const size_t nEndOffset = (i + 1) * N;
3388 4122 : for (size_t k = nStartOffset; k < nEndOffset; ++k)
3389 2748 : abyChildValidity[k] = false;
3390 : }
3391 : }
3392 809 : CPL_IGNORE_RET_VAL(nSrcLength);
3393 809 : CPLAssert(iStart + nSrcLength == nNewLength);
3394 :
3395 809 : if (!CompactArray(psChildSchema, psChildArray, (iStart + nOffset) * N,
3396 : abyChildValidity, nChildNewLength))
3397 0 : return false;
3398 :
3399 809 : if (schema->flags & ARROW_FLAG_NULLABLE)
3400 809 : CompactValidityBuffer(schema, array, iStart, abyValidityFromFilters,
3401 : nNewLength);
3402 :
3403 809 : array->length = nNewLength;
3404 :
3405 809 : CPLAssert(psChildArray->length >=
3406 : static_cast<int64_t>(N) * (array->length + array->offset));
3407 :
3408 809 : return true;
3409 : }
3410 :
3411 : /************************************************************************/
3412 : /* CompactMapArray() */
3413 : /************************************************************************/
3414 :
3415 561 : static bool CompactMapArray(const struct ArrowSchema *schema,
3416 : struct ArrowArray *array, size_t iStart,
3417 : const std::vector<bool> &abyValidityFromFilters,
3418 : size_t nNewLength)
3419 : {
3420 561 : return CompactListArray<uint32_t>(schema, array, iStart,
3421 561 : abyValidityFromFilters, nNewLength);
3422 : }
3423 :
3424 : /************************************************************************/
3425 : /* CompactArray() */
3426 : /************************************************************************/
3427 :
3428 8383 : static bool CompactArray(const struct ArrowSchema *schema,
3429 : struct ArrowArray *array, size_t iStart,
3430 : const std::vector<bool> &abyValidityFromFilters,
3431 : size_t nNewLength)
3432 : {
3433 8383 : const char *format = schema->format;
3434 :
3435 8383 : if (IsStructure(format))
3436 : {
3437 582 : if (!CompactStructArray(schema, array, iStart, abyValidityFromFilters,
3438 : nNewLength))
3439 0 : return false;
3440 : }
3441 7801 : else if (IsList(format))
3442 : {
3443 1209 : if (!CompactListArray<uint32_t>(schema, array, iStart,
3444 : abyValidityFromFilters, nNewLength))
3445 0 : return false;
3446 : }
3447 6592 : else if (IsLargeList(format))
3448 : {
3449 3 : if (!CompactListArray<uint64_t>(schema, array, iStart,
3450 : abyValidityFromFilters, nNewLength))
3451 0 : return false;
3452 : }
3453 6589 : else if (IsMap(format))
3454 : {
3455 561 : if (!CompactMapArray(schema, array, iStart, abyValidityFromFilters,
3456 : nNewLength))
3457 0 : return false;
3458 : }
3459 6028 : else if (IsFixedSizeList(format))
3460 : {
3461 809 : const int N = GetFixedSizeList(format);
3462 809 : if (N <= 0)
3463 0 : return false;
3464 809 : if (!CompactFixedSizeListArray(schema, array, static_cast<size_t>(N),
3465 : iStart, abyValidityFromFilters,
3466 : nNewLength))
3467 0 : return false;
3468 : }
3469 5219 : else if (IsBoolean(format))
3470 : {
3471 224 : CompactBoolArray(schema, array, iStart, abyValidityFromFilters,
3472 : nNewLength);
3473 : }
3474 4995 : else if (IsInt8(format) || IsUInt8(format))
3475 : {
3476 444 : CompactPrimitiveArray<uint8_t>(schema, array, iStart,
3477 : abyValidityFromFilters, nNewLength);
3478 : }
3479 4551 : else if (IsInt16(format) || IsUInt16(format) || IsFloat16(format))
3480 : {
3481 458 : CompactPrimitiveArray<uint16_t>(schema, array, iStart,
3482 : abyValidityFromFilters, nNewLength);
3483 : }
3484 7893 : else if (IsInt32(format) || IsUInt32(format) || IsFloat32(format) ||
3485 11323 : strcmp(format, "tdD") == 0 || strcmp(format, "tts") == 0 ||
3486 3430 : strcmp(format, "ttm") == 0)
3487 : {
3488 794 : CompactPrimitiveArray<uint32_t>(schema, array, iStart,
3489 : abyValidityFromFilters, nNewLength);
3490 : }
3491 5879 : else if (IsInt64(format) || IsUInt64(format) || IsFloat64(format) ||
3492 1997 : strcmp(format, "tdm") == 0 || strcmp(format, "ttu") == 0 ||
3493 5879 : strcmp(format, "ttn") == 0 || strncmp(format, "ts", 2) == 0)
3494 : {
3495 1807 : CompactPrimitiveArray<uint64_t>(schema, array, iStart,
3496 : abyValidityFromFilters, nNewLength);
3497 : }
3498 1492 : else if (IsString(format) || IsBinary(format))
3499 : {
3500 983 : CompactStringOrBinaryArray<uint32_t>(
3501 : schema, array, iStart, abyValidityFromFilters, nNewLength);
3502 : }
3503 509 : else if (IsLargeString(format) || IsLargeBinary(format))
3504 : {
3505 204 : CompactStringOrBinaryArray<uint64_t>(
3506 : schema, array, iStart, abyValidityFromFilters, nNewLength);
3507 : }
3508 305 : else if (IsFixedWidthBinary(format))
3509 : {
3510 67 : const int nWidth = GetFixedWithBinary(format);
3511 67 : CompactFixedWidthArray(schema, array, nWidth, iStart,
3512 : abyValidityFromFilters, nNewLength);
3513 : }
3514 238 : else if (IsDecimal(format))
3515 : {
3516 238 : int nPrecision = 0;
3517 238 : int nScale = 0;
3518 238 : int nWidthInBytes = 0;
3519 238 : if (!ParseDecimalFormat(format, nPrecision, nScale, nWidthInBytes))
3520 : {
3521 0 : CPLError(CE_Failure, CPLE_AppDefined,
3522 : "Unexpected error in PostFilterArrowArray(): unhandled "
3523 : "field format: %s",
3524 : format);
3525 :
3526 0 : return false;
3527 : }
3528 238 : CompactFixedWidthArray(schema, array, nWidthInBytes, iStart,
3529 : abyValidityFromFilters, nNewLength);
3530 : }
3531 : else
3532 : {
3533 0 : CPLError(CE_Failure, CPLE_AppDefined,
3534 : "Unexpected error in CompactArray(): unhandled "
3535 : "field format: %s",
3536 : format);
3537 0 : return false;
3538 : }
3539 :
3540 8383 : return true;
3541 : }
3542 :
3543 : /************************************************************************/
3544 : /* FillValidityArrayFromWKBArray() */
3545 : /************************************************************************/
3546 :
3547 : template <class OffsetType>
3548 : static size_t
3549 21 : FillValidityArrayFromWKBArray(struct ArrowArray *array, const OGRLayer *poLayer,
3550 : std::vector<bool> &abyValidityFromFilters)
3551 : {
3552 21 : const size_t nLength = static_cast<size_t>(array->length);
3553 14 : const uint8_t *pabyValidity =
3554 21 : array->null_count == 0
3555 : ? nullptr
3556 7 : : static_cast<const uint8_t *>(array->buffers[0]);
3557 21 : const size_t nOffset = static_cast<size_t>(array->offset);
3558 21 : const OffsetType *panOffsets =
3559 21 : static_cast<const OffsetType *>(array->buffers[1]) + nOffset;
3560 21 : const GByte *pabyData = static_cast<const GByte *>(array->buffers[2]);
3561 21 : OGREnvelope sEnvelope;
3562 21 : abyValidityFromFilters.resize(nLength);
3563 21 : size_t nCountIntersecting = 0;
3564 138 : for (size_t i = 0; i < nLength; ++i)
3565 : {
3566 117 : if (!pabyValidity || TestBit(pabyValidity, i + nOffset))
3567 : {
3568 110 : const GByte *pabyWKB = pabyData + panOffsets[i];
3569 110 : const size_t nWKBSize =
3570 110 : static_cast<size_t>(panOffsets[i + 1] - panOffsets[i]);
3571 110 : if (poLayer->FilterWKBGeometry(pabyWKB, nWKBSize,
3572 : /* bEnvelopeAlreadySet=*/false,
3573 : sEnvelope))
3574 : {
3575 29 : abyValidityFromFilters[i] = true;
3576 29 : nCountIntersecting++;
3577 : }
3578 : }
3579 : }
3580 21 : return nCountIntersecting;
3581 : }
3582 :
3583 : /************************************************************************/
3584 : /* ArrowTimestampToOGRDateTime() */
3585 : /************************************************************************/
3586 :
3587 107 : static void ArrowTimestampToOGRDateTime(int64_t nTimestamp,
3588 : int nInvFactorToSecond,
3589 : const char *pszTZ, OGRFeature &oFeature,
3590 : int iField)
3591 : {
3592 107 : double floatingPart = 0;
3593 107 : if (nInvFactorToSecond)
3594 : {
3595 107 : floatingPart =
3596 107 : (nTimestamp % nInvFactorToSecond) / double(nInvFactorToSecond);
3597 107 : nTimestamp /= nInvFactorToSecond;
3598 : }
3599 107 : int nTZFlag = 0;
3600 107 : const size_t nTZLen = strlen(pszTZ);
3601 107 : if ((nTZLen == 3 && strcmp(pszTZ, "UTC") == 0) ||
3602 0 : (nTZLen == 7 && strcmp(pszTZ, "Etc/UTC") == 0))
3603 : {
3604 17 : nTZFlag = 100;
3605 : }
3606 90 : else if (nTZLen == 6 && (pszTZ[0] == '+' || pszTZ[0] == '-') &&
3607 33 : pszTZ[3] == ':')
3608 : {
3609 33 : int nTZHour = atoi(pszTZ + 1);
3610 33 : int nTZMin = atoi(pszTZ + 4);
3611 33 : if (nTZHour >= 0 && nTZHour <= 14 && nTZMin >= 0 && nTZMin < 60 &&
3612 33 : (nTZMin % 15) == 0)
3613 : {
3614 33 : nTZFlag = (nTZHour * 4) + (nTZMin / 15);
3615 33 : if (pszTZ[0] == '+')
3616 : {
3617 24 : nTZFlag = 100 + nTZFlag;
3618 24 : nTimestamp += nTZHour * 3600 + nTZMin * 60;
3619 : }
3620 : else
3621 : {
3622 9 : nTZFlag = 100 - nTZFlag;
3623 9 : nTimestamp -= nTZHour * 3600 + nTZMin * 60;
3624 : }
3625 : }
3626 : }
3627 : struct tm dt;
3628 107 : CPLUnixTimeToYMDHMS(nTimestamp, &dt);
3629 107 : oFeature.SetField(iField, dt.tm_year + 1900, dt.tm_mon + 1, dt.tm_mday,
3630 : dt.tm_hour, dt.tm_min,
3631 107 : static_cast<float>(dt.tm_sec + floatingPart), nTZFlag);
3632 107 : }
3633 :
3634 : /************************************************************************/
3635 : /* BuildMapFieldNameToArrowPath() */
3636 : /************************************************************************/
3637 :
3638 : static void
3639 334 : BuildMapFieldNameToArrowPath(const struct ArrowSchema *schema,
3640 : std::map<std::string, std::vector<int>> &oMap,
3641 : const std::string &osPrefix,
3642 : std::vector<int> &anArrowPath)
3643 : {
3644 7833 : for (int64_t i = 0; i < schema->n_children; ++i)
3645 : {
3646 7499 : auto psChild = schema->children[i];
3647 7499 : anArrowPath.push_back(static_cast<int>(i));
3648 7499 : if (IsStructure(psChild->format))
3649 : {
3650 400 : std::string osNewPrefix(osPrefix);
3651 200 : osNewPrefix += psChild->name;
3652 200 : osNewPrefix += ".";
3653 200 : BuildMapFieldNameToArrowPath(psChild, oMap, osNewPrefix,
3654 : anArrowPath);
3655 : }
3656 : else
3657 : {
3658 7299 : oMap[osPrefix + psChild->name] = anArrowPath;
3659 : }
3660 7499 : anArrowPath.pop_back();
3661 : }
3662 334 : }
3663 :
3664 : /************************************************************************/
3665 : /* FillFieldList() */
3666 : /************************************************************************/
3667 :
3668 : template <typename ListOffsetType, typename ArrowType,
3669 : typename OGRType = ArrowType>
3670 167 : inline static void FillFieldList(const struct ArrowArray *array,
3671 : int iOGRFieldIdx, size_t nOffsettedIndex,
3672 : const struct ArrowArray *childArray,
3673 : OGRFeature &oFeature)
3674 : {
3675 167 : const auto panOffsets =
3676 167 : static_cast<const ListOffsetType *>(array->buffers[1]) +
3677 : nOffsettedIndex;
3678 334 : std::vector<OGRType> aValues;
3679 167 : const auto *paValues =
3680 167 : static_cast<const ArrowType *>(childArray->buffers[1]);
3681 167 : for (size_t i = static_cast<size_t>(panOffsets[0]);
3682 509 : i < static_cast<size_t>(panOffsets[1]); ++i)
3683 : {
3684 342 : aValues.push_back(static_cast<OGRType>(paValues[i]));
3685 : }
3686 167 : oFeature.SetField(iOGRFieldIdx, static_cast<int>(aValues.size()),
3687 : aValues.data());
3688 167 : }
3689 :
3690 : /************************************************************************/
3691 : /* FillFieldListFromBool() */
3692 : /************************************************************************/
3693 :
3694 : template <typename ListOffsetType>
3695 : inline static void
3696 16 : FillFieldListFromBool(const struct ArrowArray *array, int iOGRFieldIdx,
3697 : size_t nOffsettedIndex,
3698 : const struct ArrowArray *childArray, OGRFeature &oFeature)
3699 : {
3700 16 : const auto panOffsets =
3701 16 : static_cast<const ListOffsetType *>(array->buffers[1]) +
3702 : nOffsettedIndex;
3703 32 : std::vector<int> aValues;
3704 16 : const auto *paValues = static_cast<const uint8_t *>(childArray->buffers[1]);
3705 16 : for (size_t i = static_cast<size_t>(panOffsets[0]);
3706 48 : i < static_cast<size_t>(panOffsets[1]); ++i)
3707 : {
3708 32 : aValues.push_back(TestBit(paValues, i) ? 1 : 0);
3709 : }
3710 16 : oFeature.SetField(iOGRFieldIdx, static_cast<int>(aValues.size()),
3711 16 : aValues.data());
3712 16 : }
3713 :
3714 : /************************************************************************/
3715 : /* FillFieldListFromHalfFloat() */
3716 : /************************************************************************/
3717 :
3718 : template <typename ListOffsetType>
3719 8 : inline static void FillFieldListFromHalfFloat(
3720 : const struct ArrowArray *array, int iOGRFieldIdx, size_t nOffsettedIndex,
3721 : const struct ArrowArray *childArray, OGRFeature &oFeature)
3722 : {
3723 8 : const auto panOffsets =
3724 8 : static_cast<const ListOffsetType *>(array->buffers[1]) +
3725 : nOffsettedIndex;
3726 16 : std::vector<double> aValues;
3727 8 : const auto *paValues =
3728 8 : static_cast<const uint16_t *>(childArray->buffers[1]);
3729 8 : for (size_t i = static_cast<size_t>(panOffsets[0]);
3730 24 : i < static_cast<size_t>(panOffsets[1]); ++i)
3731 : {
3732 16 : const auto nFloat16AsUInt32 = CPLHalfToFloat(paValues[i]);
3733 : float f;
3734 16 : memcpy(&f, &nFloat16AsUInt32, sizeof(f));
3735 16 : aValues.push_back(f);
3736 : }
3737 8 : oFeature.SetField(iOGRFieldIdx, static_cast<int>(aValues.size()),
3738 8 : aValues.data());
3739 8 : }
3740 :
3741 : /************************************************************************/
3742 : /* FillFieldListFromString() */
3743 : /************************************************************************/
3744 :
3745 : template <typename ListOffsetType, typename StringOffsetType>
3746 32 : inline static void FillFieldListFromString(const struct ArrowArray *array,
3747 : int iOGRFieldIdx,
3748 : size_t nOffsettedIndex,
3749 : const struct ArrowArray *childArray,
3750 : OGRFeature &oFeature)
3751 : {
3752 32 : const auto panOffsets =
3753 32 : static_cast<const ListOffsetType *>(array->buffers[1]) +
3754 : nOffsettedIndex;
3755 64 : CPLStringList aosVals;
3756 32 : const auto panSubOffsets =
3757 32 : static_cast<const StringOffsetType *>(childArray->buffers[1]);
3758 32 : const char *pszValues = static_cast<const char *>(childArray->buffers[2]);
3759 64 : std::string osTmp;
3760 90 : for (size_t i = static_cast<size_t>(panOffsets[0]);
3761 90 : i < static_cast<size_t>(panOffsets[1]); ++i)
3762 : {
3763 58 : osTmp.assign(
3764 58 : pszValues + panSubOffsets[i],
3765 58 : static_cast<size_t>(panSubOffsets[i + 1] - panSubOffsets[i]));
3766 58 : aosVals.AddString(osTmp.c_str());
3767 : }
3768 32 : oFeature.SetField(iOGRFieldIdx, aosVals.List());
3769 32 : }
3770 :
3771 : /************************************************************************/
3772 : /* FillFieldFixedSizeList() */
3773 : /************************************************************************/
3774 :
3775 : template <typename ArrowType, typename OGRType = ArrowType>
3776 120 : inline static void FillFieldFixedSizeList(
3777 : const struct ArrowArray *, int iOGRFieldIdx, size_t nOffsettedIndex,
3778 : const int nItems, const struct ArrowArray *childArray, OGRFeature &oFeature)
3779 : {
3780 240 : std::vector<OGRType> aValues;
3781 120 : const auto *paValues =
3782 120 : static_cast<const ArrowType *>(childArray->buffers[1]) +
3783 120 : childArray->offset + nOffsettedIndex * nItems;
3784 360 : for (int i = 0; i < nItems; ++i)
3785 : {
3786 240 : aValues.push_back(static_cast<OGRType>(paValues[i]));
3787 : }
3788 120 : oFeature.SetField(iOGRFieldIdx, static_cast<int>(aValues.size()),
3789 : aValues.data());
3790 120 : }
3791 :
3792 : /************************************************************************/
3793 : /* FillFieldFixedSizeListString() */
3794 : /************************************************************************/
3795 :
3796 : template <typename StringOffsetType>
3797 17 : inline static void FillFieldFixedSizeListString(
3798 : const struct ArrowArray *, int iOGRFieldIdx, size_t nOffsettedIndex,
3799 : const int nItems, const struct ArrowArray *childArray, OGRFeature &oFeature)
3800 : {
3801 34 : CPLStringList aosVals;
3802 17 : const auto panSubOffsets =
3803 17 : static_cast<const StringOffsetType *>(childArray->buffers[1]) +
3804 17 : childArray->offset + nOffsettedIndex * nItems;
3805 17 : const char *pszValues = static_cast<const char *>(childArray->buffers[2]);
3806 34 : std::string osTmp;
3807 51 : for (int i = 0; i < nItems; ++i)
3808 : {
3809 34 : osTmp.assign(
3810 34 : pszValues + panSubOffsets[i],
3811 34 : static_cast<size_t>(panSubOffsets[i + 1] - panSubOffsets[i]));
3812 34 : aosVals.AddString(osTmp.c_str());
3813 : }
3814 17 : oFeature.SetField(iOGRFieldIdx, aosVals.List());
3815 17 : }
3816 :
3817 : /************************************************************************/
3818 : /* GetValue() */
3819 : /************************************************************************/
3820 :
3821 : template <typename ArrowType>
3822 245 : inline static ArrowType GetValue(const struct ArrowArray *array,
3823 : size_t iFeature)
3824 : {
3825 245 : const auto *panValues = static_cast<const ArrowType *>(array->buffers[1]);
3826 245 : return panValues[iFeature + array->offset];
3827 : }
3828 :
3829 12 : template <> bool GetValue<bool>(const struct ArrowArray *array, size_t iFeature)
3830 : {
3831 12 : const auto *pabyValues = static_cast<const uint8_t *>(array->buffers[1]);
3832 12 : return TestBit(pabyValues, iFeature + static_cast<size_t>(array->offset));
3833 : }
3834 :
3835 : /************************************************************************/
3836 : /* GetValueFloat16() */
3837 : /************************************************************************/
3838 :
3839 23 : static float GetValueFloat16(const struct ArrowArray *array, const size_t nIdx)
3840 : {
3841 23 : const auto *panValues = static_cast<const uint16_t *>(array->buffers[1]);
3842 : const auto nFloat16AsUInt32 =
3843 23 : CPLHalfToFloat(panValues[nIdx + array->offset]);
3844 : float f;
3845 23 : memcpy(&f, &nFloat16AsUInt32, sizeof(f));
3846 23 : return f;
3847 : }
3848 :
3849 : /************************************************************************/
3850 : /* GetValueDecimal() */
3851 : /************************************************************************/
3852 :
3853 71 : static double GetValueDecimal(const struct ArrowArray *array,
3854 : const int nWidthIn64BitWord, const int nScale,
3855 : const size_t nIdx)
3856 : {
3857 : #ifdef CPL_LSB
3858 71 : const auto nIdxIn64BitWord = nIdx * nWidthIn64BitWord;
3859 : #else
3860 : const auto nIdxIn64BitWord =
3861 : nIdx * nWidthIn64BitWord + nWidthIn64BitWord - 1;
3862 : #endif
3863 71 : const auto *panValues = static_cast<const int64_t *>(array->buffers[1]);
3864 71 : const auto nVal =
3865 71 : panValues[nIdxIn64BitWord + array->offset * nWidthIn64BitWord];
3866 71 : return static_cast<double>(nVal) * std::pow(10.0, -nScale);
3867 : }
3868 :
3869 : /************************************************************************/
3870 : /* GetString() */
3871 : /************************************************************************/
3872 :
3873 : template <class OffsetType>
3874 33 : static std::string GetString(const struct ArrowArray *array, const size_t nIdx)
3875 : {
3876 33 : const OffsetType *panOffsets =
3877 33 : static_cast<const OffsetType *>(array->buffers[1]) +
3878 33 : static_cast<size_t>(array->offset) + nIdx;
3879 33 : const char *pabyStr = static_cast<const char *>(array->buffers[2]);
3880 33 : std::string osStr;
3881 33 : osStr.assign(pabyStr + static_cast<size_t>(panOffsets[0]),
3882 33 : static_cast<size_t>(panOffsets[1] - panOffsets[0]));
3883 33 : return osStr;
3884 : }
3885 :
3886 : /************************************************************************/
3887 : /* GetBinaryAsBase64() */
3888 : /************************************************************************/
3889 :
3890 : template <class OffsetType>
3891 8 : static std::string GetBinaryAsBase64(const struct ArrowArray *array,
3892 : const size_t nIdx)
3893 : {
3894 8 : const OffsetType *panOffsets =
3895 8 : static_cast<const OffsetType *>(array->buffers[1]) +
3896 8 : static_cast<size_t>(array->offset) + nIdx;
3897 8 : const GByte *pabyData = static_cast<const GByte *>(array->buffers[2]);
3898 8 : const size_t nLen = static_cast<size_t>(panOffsets[1] - panOffsets[0]);
3899 8 : if (nLen > static_cast<size_t>(std::numeric_limits<int>::max()))
3900 : {
3901 0 : CPLError(CE_Failure, CPLE_AppDefined, "Too large binary");
3902 0 : return std::string();
3903 : }
3904 16 : char *pszVal = CPLBase64Encode(
3905 8 : static_cast<int>(nLen), pabyData + static_cast<size_t>(panOffsets[0]));
3906 16 : std::string osStr(pszVal);
3907 8 : CPLFree(pszVal);
3908 8 : return osStr;
3909 : }
3910 :
3911 : /************************************************************************/
3912 : /* GetValueFixedWithBinaryAsBase64() */
3913 : /************************************************************************/
3914 :
3915 : static std::string
3916 4 : GetValueFixedWithBinaryAsBase64(const struct ArrowArray *array,
3917 : const int nWidth, const size_t nIdx)
3918 : {
3919 4 : const GByte *pabyData = static_cast<const GByte *>(array->buffers[1]);
3920 8 : char *pszVal = CPLBase64Encode(
3921 : nWidth,
3922 4 : pabyData + (static_cast<size_t>(array->offset) + nIdx) * nWidth);
3923 4 : std::string osStr(pszVal);
3924 4 : CPLFree(pszVal);
3925 4 : return osStr;
3926 : }
3927 :
3928 : static CPLJSONObject GetObjectAsJSON(const struct ArrowSchema *schema,
3929 : const struct ArrowArray *array,
3930 : const size_t nIdx);
3931 :
3932 : /************************************************************************/
3933 : /* AddToArray() */
3934 : /************************************************************************/
3935 :
3936 142 : static void AddToArray(CPLJSONArray &oArray, const struct ArrowSchema *schema,
3937 : const struct ArrowArray *array, const size_t nIdx)
3938 : {
3939 142 : if (IsBoolean(schema->format))
3940 7 : oArray.Add(GetValue<bool>(array, nIdx));
3941 135 : else if (IsUInt8(schema->format))
3942 13 : oArray.Add(GetValue<uint8_t>(array, nIdx));
3943 122 : else if (IsInt8(schema->format))
3944 7 : oArray.Add(GetValue<int8_t>(array, nIdx));
3945 115 : else if (IsUInt16(schema->format))
3946 7 : oArray.Add(GetValue<uint16_t>(array, nIdx));
3947 108 : else if (IsInt16(schema->format))
3948 7 : oArray.Add(GetValue<int16_t>(array, nIdx));
3949 101 : else if (IsUInt32(schema->format))
3950 7 : oArray.Add(static_cast<GIntBig>(GetValue<uint32_t>(array, nIdx)));
3951 94 : else if (IsInt32(schema->format))
3952 7 : oArray.Add(GetValue<int32_t>(array, nIdx));
3953 87 : else if (IsUInt64(schema->format))
3954 7 : oArray.Add(GetValue<uint64_t>(array, nIdx));
3955 80 : else if (IsInt64(schema->format))
3956 7 : oArray.Add(static_cast<GIntBig>(GetValue<int64_t>(array, nIdx)));
3957 73 : else if (IsFloat16(schema->format))
3958 7 : oArray.Add(GetValueFloat16(array, nIdx));
3959 66 : else if (IsFloat32(schema->format))
3960 7 : oArray.Add(GetValue<float>(array, nIdx));
3961 59 : else if (IsFloat64(schema->format))
3962 7 : oArray.Add(GetValue<double>(array, nIdx));
3963 52 : else if (IsString(schema->format))
3964 13 : oArray.Add(GetString<uint32_t>(array, nIdx));
3965 39 : else if (IsLargeString(schema->format))
3966 4 : oArray.Add(GetString<uint64_t>(array, nIdx));
3967 35 : else if (IsBinary(schema->format))
3968 2 : oArray.Add(GetBinaryAsBase64<uint32_t>(array, nIdx));
3969 33 : else if (IsLargeBinary(schema->format))
3970 2 : oArray.Add(GetBinaryAsBase64<uint64_t>(array, nIdx));
3971 31 : else if (IsFixedWidthBinary(schema->format))
3972 2 : oArray.Add(GetValueFixedWithBinaryAsBase64(
3973 2 : array, GetFixedWithBinary(schema->format), nIdx));
3974 29 : else if (IsDecimal(schema->format))
3975 : {
3976 7 : int nPrecision = 0;
3977 7 : int nScale = 0;
3978 7 : int nWidthInBytes = 0;
3979 7 : const bool bOK = ParseDecimalFormat(schema->format, nPrecision, nScale,
3980 7 : nWidthInBytes);
3981 : // Already validated
3982 7 : CPLAssert(bOK);
3983 7 : CPL_IGNORE_RET_VAL(bOK);
3984 7 : oArray.Add(GetValueDecimal(array, nWidthInBytes / 8, nScale, nIdx));
3985 : }
3986 : else
3987 22 : oArray.Add(GetObjectAsJSON(schema, array, nIdx));
3988 142 : }
3989 :
3990 : /************************************************************************/
3991 : /* GetListAsJSON() */
3992 : /************************************************************************/
3993 :
3994 : template <class OffsetType>
3995 112 : static CPLJSONArray GetListAsJSON(const struct ArrowSchema *schema,
3996 : const struct ArrowArray *array,
3997 : const size_t nIdx)
3998 : {
3999 112 : CPLJSONArray oArray;
4000 112 : const auto panOffsets = static_cast<const OffsetType *>(array->buffers[1]) +
4001 112 : array->offset + nIdx;
4002 112 : const auto childSchema = schema->children[0];
4003 112 : const auto childArray = array->children[0];
4004 5 : const uint8_t *pabyValidity =
4005 112 : childArray->null_count == 0
4006 : ? nullptr
4007 107 : : static_cast<const uint8_t *>(childArray->buffers[0]);
4008 278 : for (size_t k = static_cast<size_t>(panOffsets[0]);
4009 278 : k < static_cast<size_t>(panOffsets[1]); k++)
4010 : {
4011 318 : if (!pabyValidity ||
4012 152 : TestBit(pabyValidity, k + static_cast<size_t>(childArray->offset)))
4013 : {
4014 136 : AddToArray(oArray, childSchema, childArray, k);
4015 : }
4016 : else
4017 : {
4018 30 : oArray.AddNull();
4019 : }
4020 : }
4021 112 : return oArray;
4022 : }
4023 :
4024 : /************************************************************************/
4025 : /* GetFixedSizeListAsJSON() */
4026 : /************************************************************************/
4027 :
4028 3 : static CPLJSONArray GetFixedSizeListAsJSON(const struct ArrowSchema *schema,
4029 : const struct ArrowArray *array,
4030 : const size_t nIdx)
4031 : {
4032 3 : CPLJSONArray oArray;
4033 3 : const int nVals = GetFixedSizeList(schema->format);
4034 3 : const auto childSchema = schema->children[0];
4035 3 : const auto childArray = array->children[0];
4036 3 : const uint8_t *pabyValidity =
4037 3 : childArray->null_count == 0
4038 3 : ? nullptr
4039 3 : : static_cast<const uint8_t *>(childArray->buffers[0]);
4040 9 : for (size_t k = nIdx * nVals; k < (nIdx + 1) * nVals; k++)
4041 : {
4042 12 : if (!pabyValidity ||
4043 6 : TestBit(pabyValidity, k + static_cast<size_t>(childArray->offset)))
4044 : {
4045 6 : AddToArray(oArray, childSchema, childArray, k);
4046 : }
4047 : else
4048 : {
4049 0 : oArray.AddNull();
4050 : }
4051 : }
4052 3 : return oArray;
4053 : }
4054 :
4055 : /************************************************************************/
4056 : /* AddToDict() */
4057 : /************************************************************************/
4058 :
4059 198 : static void AddToDict(CPLJSONObject &oDict, const std::string &osKey,
4060 : const struct ArrowSchema *schema,
4061 : const struct ArrowArray *array, const size_t nIdx)
4062 : {
4063 198 : if (IsBoolean(schema->format))
4064 5 : oDict.Add(osKey, GetValue<bool>(array, nIdx));
4065 193 : else if (IsUInt8(schema->format))
4066 5 : oDict.Add(osKey, GetValue<uint8_t>(array, nIdx));
4067 188 : else if (IsInt8(schema->format))
4068 5 : oDict.Add(osKey, GetValue<int8_t>(array, nIdx));
4069 183 : else if (IsUInt16(schema->format))
4070 5 : oDict.Add(osKey, GetValue<uint16_t>(array, nIdx));
4071 178 : else if (IsInt16(schema->format))
4072 5 : oDict.Add(osKey, GetValue<int16_t>(array, nIdx));
4073 173 : else if (IsUInt32(schema->format))
4074 2 : oDict.Add(osKey, static_cast<GIntBig>(GetValue<uint32_t>(array, nIdx)));
4075 171 : else if (IsInt32(schema->format))
4076 6 : oDict.Add(osKey, GetValue<int32_t>(array, nIdx));
4077 165 : else if (IsUInt64(schema->format))
4078 5 : oDict.Add(osKey, GetValue<uint64_t>(array, nIdx));
4079 160 : else if (IsInt64(schema->format))
4080 22 : oDict.Add(osKey, static_cast<GIntBig>(GetValue<int64_t>(array, nIdx)));
4081 138 : else if (IsFloat16(schema->format))
4082 2 : oDict.Add(osKey, GetValueFloat16(array, nIdx));
4083 136 : else if (IsFloat32(schema->format))
4084 5 : oDict.Add(osKey, GetValue<float>(array, nIdx));
4085 131 : else if (IsFloat64(schema->format))
4086 19 : oDict.Add(osKey, GetValue<double>(array, nIdx));
4087 112 : else if (IsString(schema->format))
4088 14 : oDict.Add(osKey, GetString<uint32_t>(array, nIdx));
4089 98 : else if (IsLargeString(schema->format))
4090 2 : oDict.Add(osKey, GetString<uint64_t>(array, nIdx));
4091 96 : else if (IsBinary(schema->format))
4092 2 : oDict.Add(osKey, GetBinaryAsBase64<uint32_t>(array, nIdx));
4093 94 : else if (IsLargeBinary(schema->format))
4094 2 : oDict.Add(osKey, GetBinaryAsBase64<uint64_t>(array, nIdx));
4095 92 : else if (IsFixedWidthBinary(schema->format))
4096 2 : oDict.Add(osKey, GetValueFixedWithBinaryAsBase64(
4097 2 : array, GetFixedWithBinary(schema->format), nIdx));
4098 90 : else if (IsDecimal(schema->format))
4099 : {
4100 8 : int nPrecision = 0;
4101 8 : int nScale = 0;
4102 8 : int nWidthInBytes = 0;
4103 8 : const bool bOK = ParseDecimalFormat(schema->format, nPrecision, nScale,
4104 8 : nWidthInBytes);
4105 : // Already validated
4106 8 : CPLAssert(bOK);
4107 8 : CPL_IGNORE_RET_VAL(bOK);
4108 8 : oDict.Add(osKey,
4109 : GetValueDecimal(array, nWidthInBytes / 8, nScale, nIdx));
4110 : }
4111 : else
4112 82 : oDict.Add(osKey, GetObjectAsJSON(schema, array, nIdx));
4113 198 : }
4114 :
4115 : /************************************************************************/
4116 : /* GetMapAsJSON() */
4117 : /************************************************************************/
4118 :
4119 243 : static CPLJSONObject GetMapAsJSON(const struct ArrowSchema *schema,
4120 : const struct ArrowArray *array,
4121 : const size_t nIdx)
4122 : {
4123 243 : const auto schemaStruct = schema->children[0];
4124 243 : if (!IsStructure(schemaStruct->format))
4125 : {
4126 0 : CPLError(CE_Failure, CPLE_AppDefined,
4127 : "GetMapAsJSON(): !IsStructure(schemaStruct->format))");
4128 0 : return CPLJSONObject();
4129 : }
4130 243 : const auto schemaKey = schemaStruct->children[0];
4131 243 : const auto schemaValues = schemaStruct->children[1];
4132 243 : if (!IsString(schemaKey->format))
4133 : {
4134 0 : CPLError(CE_Failure, CPLE_AppDefined,
4135 : "GetMapAsJSON(): !IsString(schemaKey->format))");
4136 0 : return CPLJSONObject();
4137 : }
4138 243 : const auto arrayKeys = array->children[0]->children[0];
4139 243 : const auto arrayValues = array->children[0]->children[1];
4140 :
4141 486 : CPLJSONObject oDict;
4142 243 : const auto panOffsets =
4143 243 : static_cast<const uint32_t *>(array->buffers[1]) + array->offset + nIdx;
4144 243 : const uint8_t *pabyValidityKeys =
4145 243 : arrayKeys->null_count == 0
4146 243 : ? nullptr
4147 0 : : static_cast<const uint8_t *>(arrayKeys->buffers[0]);
4148 243 : const uint32_t *panOffsetsKeys =
4149 243 : static_cast<const uint32_t *>(arrayKeys->buffers[1]) +
4150 243 : arrayKeys->offset;
4151 243 : const char *pabyKeys = static_cast<const char *>(arrayKeys->buffers[2]);
4152 243 : const uint8_t *pabyValidityValues =
4153 243 : arrayValues->null_count == 0
4154 243 : ? nullptr
4155 237 : : static_cast<const uint8_t *>(arrayValues->buffers[0]);
4156 463 : for (uint32_t k = panOffsets[0]; k < panOffsets[1]; k++)
4157 : {
4158 220 : if (!pabyValidityKeys ||
4159 0 : TestBit(pabyValidityKeys,
4160 0 : k + static_cast<size_t>(arrayKeys->offset)))
4161 : {
4162 440 : std::string osKey;
4163 220 : osKey.assign(pabyKeys + panOffsetsKeys[k],
4164 220 : panOffsetsKeys[k + 1] - panOffsetsKeys[k]);
4165 :
4166 433 : if (!pabyValidityValues ||
4167 213 : TestBit(pabyValidityValues,
4168 213 : k + static_cast<size_t>(arrayValues->offset)))
4169 : {
4170 168 : AddToDict(oDict, osKey, schemaValues, arrayValues, k);
4171 : }
4172 : else
4173 : {
4174 52 : oDict.AddNull(osKey);
4175 : }
4176 : }
4177 : }
4178 243 : return oDict;
4179 : }
4180 :
4181 : /************************************************************************/
4182 : /* GetStructureAsJSON() */
4183 : /************************************************************************/
4184 :
4185 16 : static CPLJSONObject GetStructureAsJSON(const struct ArrowSchema *schema,
4186 : const struct ArrowArray *array,
4187 : const size_t nIdx)
4188 : {
4189 16 : CPLJSONObject oDict;
4190 62 : for (int64_t k = 0; k < array->n_children; k++)
4191 : {
4192 46 : const uint8_t *pabyValidityValues =
4193 46 : array->children[k]->null_count == 0
4194 46 : ? nullptr
4195 36 : : static_cast<const uint8_t *>(array->children[k]->buffers[0]);
4196 82 : if (!pabyValidityValues ||
4197 36 : TestBit(pabyValidityValues,
4198 36 : nIdx + static_cast<size_t>(array->children[k]->offset)))
4199 : {
4200 30 : AddToDict(oDict, schema->children[k]->name, schema->children[k],
4201 30 : array->children[k], nIdx);
4202 : }
4203 : else
4204 : {
4205 16 : oDict.AddNull(schema->children[k]->name);
4206 : }
4207 : }
4208 16 : return oDict;
4209 : }
4210 :
4211 : /************************************************************************/
4212 : /* GetObjectAsJSON() */
4213 : /************************************************************************/
4214 :
4215 104 : static CPLJSONObject GetObjectAsJSON(const struct ArrowSchema *schema,
4216 : const struct ArrowArray *array,
4217 : const size_t nIdx)
4218 : {
4219 104 : if (IsMap(schema->format))
4220 4 : return GetMapAsJSON(schema, array, nIdx);
4221 100 : else if (IsList(schema->format))
4222 156 : return GetListAsJSON<uint32_t>(schema, array, nIdx);
4223 22 : else if (IsLargeList(schema->format))
4224 6 : return GetListAsJSON<uint64_t>(schema, array, nIdx);
4225 19 : else if (IsFixedSizeList(schema->format))
4226 6 : return GetFixedSizeListAsJSON(schema, array, nIdx);
4227 16 : else if (IsStructure(schema->format))
4228 16 : return GetStructureAsJSON(schema, array, nIdx);
4229 : else
4230 : {
4231 0 : CPLError(CE_Failure, CPLE_AppDefined,
4232 : "GetObjectAsJSON(): unhandled value format: %s",
4233 0 : schema->format);
4234 0 : return CPLJSONObject();
4235 : }
4236 : }
4237 :
4238 : /************************************************************************/
4239 : /* SetFieldForOtherFormats() */
4240 : /************************************************************************/
4241 :
4242 856 : static bool SetFieldForOtherFormats(OGRFeature &oFeature,
4243 : const int iOGRFieldIndex,
4244 : const size_t nOffsettedIndex,
4245 : const struct ArrowSchema *schema,
4246 : const struct ArrowArray *array)
4247 : {
4248 856 : const char *format = schema->format;
4249 856 : if (IsFloat16(format))
4250 : {
4251 4 : oFeature.SetField(
4252 : iOGRFieldIndex,
4253 4 : GetValueFloat16(array, nOffsettedIndex -
4254 4 : static_cast<size_t>(array->offset)));
4255 : }
4256 :
4257 852 : else if (IsFixedWidthBinary(format))
4258 : {
4259 : // Fixed width binary
4260 17 : const int nWidth = GetFixedWithBinary(format);
4261 17 : oFeature.SetField(iOGRFieldIndex, nWidth,
4262 17 : static_cast<const GByte *>(array->buffers[1]) +
4263 17 : nOffsettedIndex * nWidth);
4264 : }
4265 835 : else if (format[0] == 't' && format[1] == 'd' &&
4266 38 : format[2] == 'D') // strcmp(format, "tdD") == 0
4267 : {
4268 : // date32[days]
4269 : // number of days since Epoch
4270 33 : int64_t timestamp = static_cast<int64_t>(static_cast<const int32_t *>(
4271 33 : array->buffers[1])[nOffsettedIndex]) *
4272 : 3600 * 24;
4273 : struct tm dt;
4274 33 : CPLUnixTimeToYMDHMS(timestamp, &dt);
4275 33 : oFeature.SetField(iOGRFieldIndex, dt.tm_year + 1900, dt.tm_mon + 1,
4276 : dt.tm_mday, 0, 0, 0);
4277 33 : return true;
4278 : }
4279 802 : else if (format[0] == 't' && format[1] == 'd' &&
4280 5 : format[2] == 'm') // strcmp(format, "tdm") == 0
4281 : {
4282 : // date64[milliseconds]
4283 : // number of milliseconds since Epoch
4284 5 : int64_t timestamp =
4285 5 : static_cast<const int64_t *>(array->buffers[1])[nOffsettedIndex] /
4286 : 1000;
4287 : struct tm dt;
4288 5 : CPLUnixTimeToYMDHMS(timestamp, &dt);
4289 5 : oFeature.SetField(iOGRFieldIndex, dt.tm_year + 1900, dt.tm_mon + 1,
4290 5 : dt.tm_mday, 0, 0, 0);
4291 : }
4292 797 : else if (format[0] == 't' && format[1] == 't' &&
4293 39 : format[2] == 's') // strcmp(format, "tts") == 0
4294 : {
4295 : // time32 [seconds]
4296 0 : int32_t value =
4297 0 : static_cast<const int32_t *>(array->buffers[1])[nOffsettedIndex];
4298 0 : const int nHour = value / 3600;
4299 0 : const int nMinute = (value / 60) % 60;
4300 0 : const int nSecond = value % 60;
4301 0 : oFeature.SetField(iOGRFieldIndex, 0, 0, 0, nHour, nMinute,
4302 0 : static_cast<float>(nSecond));
4303 : }
4304 797 : else if (format[0] == 't' && format[1] == 't' &&
4305 39 : format[2] == 'm') // strcmp(format, "ttm") == 0
4306 : {
4307 : // time32 [milliseconds]
4308 25 : int32_t value =
4309 25 : static_cast<const int32_t *>(array->buffers[1])[nOffsettedIndex];
4310 25 : double floatingPart = (value % 1000) / 1e3;
4311 25 : value /= 1000;
4312 25 : const int nHour = value / 3600;
4313 25 : const int nMinute = (value / 60) % 60;
4314 25 : const int nSecond = value % 60;
4315 25 : oFeature.SetField(iOGRFieldIndex, 0, 0, 0, nHour, nMinute,
4316 25 : static_cast<float>(nSecond + floatingPart));
4317 : }
4318 772 : else if (format[0] == 't' && format[1] == 't' &&
4319 14 : (format[2] == 'u' || // time64 [microseconds]
4320 7 : format[2] == 'n')) // time64 [nanoseconds]
4321 : {
4322 14 : oFeature.SetField(iOGRFieldIndex,
4323 14 : static_cast<GIntBig>(static_cast<const int64_t *>(
4324 14 : array->buffers[1])[nOffsettedIndex]));
4325 : }
4326 758 : else if (IsTimestampSeconds(format))
4327 : {
4328 0 : ArrowTimestampToOGRDateTime(
4329 0 : static_cast<const int64_t *>(array->buffers[1])[nOffsettedIndex], 1,
4330 : GetTimestampTimezone(format), oFeature, iOGRFieldIndex);
4331 : }
4332 758 : else if (IsTimestampMilliseconds(format))
4333 : {
4334 73 : ArrowTimestampToOGRDateTime(
4335 73 : static_cast<const int64_t *>(array->buffers[1])[nOffsettedIndex],
4336 : 1000, GetTimestampTimezone(format), oFeature, iOGRFieldIndex);
4337 : }
4338 685 : else if (IsTimestampMicroseconds(format))
4339 : {
4340 34 : ArrowTimestampToOGRDateTime(
4341 34 : static_cast<const int64_t *>(array->buffers[1])[nOffsettedIndex],
4342 : 1000 * 1000, GetTimestampTimezone(format), oFeature,
4343 : iOGRFieldIndex);
4344 : }
4345 651 : else if (IsTimestampNanoseconds(format))
4346 : {
4347 0 : ArrowTimestampToOGRDateTime(
4348 0 : static_cast<const int64_t *>(array->buffers[1])[nOffsettedIndex],
4349 : 1000 * 1000 * 1000, GetTimestampTimezone(format), oFeature,
4350 : iOGRFieldIndex);
4351 : }
4352 651 : else if (IsFixedSizeList(format))
4353 : {
4354 154 : const int nItems = GetFixedSizeList(format);
4355 154 : const auto childArray = array->children[0];
4356 154 : const char *childFormat = schema->children[0]->format;
4357 154 : if (IsBoolean(childFormat))
4358 : {
4359 24 : std::vector<int> aValues;
4360 12 : const auto *paValues =
4361 12 : static_cast<const uint8_t *>(childArray->buffers[1]);
4362 36 : for (int i = 0; i < nItems; ++i)
4363 : {
4364 24 : aValues.push_back(
4365 24 : TestBit(paValues,
4366 24 : static_cast<size_t>(childArray->offset +
4367 24 : nOffsettedIndex * nItems + i))
4368 24 : ? 1
4369 : : 0);
4370 : }
4371 12 : oFeature.SetField(iOGRFieldIndex, static_cast<int>(aValues.size()),
4372 12 : aValues.data());
4373 : }
4374 142 : else if (IsInt8(childFormat))
4375 : {
4376 12 : FillFieldFixedSizeList<int8_t, int>(array, iOGRFieldIndex,
4377 : nOffsettedIndex, nItems,
4378 : childArray, oFeature);
4379 : }
4380 130 : else if (IsUInt8(childFormat))
4381 : {
4382 12 : FillFieldFixedSizeList<uint8_t, int>(array, iOGRFieldIndex,
4383 : nOffsettedIndex, nItems,
4384 : childArray, oFeature);
4385 : }
4386 118 : else if (IsInt16(childFormat))
4387 : {
4388 12 : FillFieldFixedSizeList<int16_t, int>(array, iOGRFieldIndex,
4389 : nOffsettedIndex, nItems,
4390 : childArray, oFeature);
4391 : }
4392 106 : else if (IsUInt16(childFormat))
4393 : {
4394 12 : FillFieldFixedSizeList<uint16_t, int>(array, iOGRFieldIndex,
4395 : nOffsettedIndex, nItems,
4396 : childArray, oFeature);
4397 : }
4398 94 : else if (IsInt32(childFormat))
4399 : {
4400 12 : FillFieldFixedSizeList<int32_t, int>(array, iOGRFieldIndex,
4401 : nOffsettedIndex, nItems,
4402 : childArray, oFeature);
4403 : }
4404 82 : else if (IsUInt32(childFormat))
4405 : {
4406 5 : FillFieldFixedSizeList<uint32_t, GIntBig>(array, iOGRFieldIndex,
4407 : nOffsettedIndex, nItems,
4408 : childArray, oFeature);
4409 : }
4410 77 : else if (IsInt64(childFormat))
4411 : {
4412 19 : FillFieldFixedSizeList<int64_t, GIntBig>(array, iOGRFieldIndex,
4413 : nOffsettedIndex, nItems,
4414 : childArray, oFeature);
4415 : }
4416 58 : else if (IsUInt64(childFormat))
4417 : {
4418 12 : FillFieldFixedSizeList<uint64_t, double>(array, iOGRFieldIndex,
4419 : nOffsettedIndex, nItems,
4420 : childArray, oFeature);
4421 : }
4422 46 : else if (IsFloat16(childFormat))
4423 : {
4424 10 : std::vector<double> aValues;
4425 15 : for (int i = 0; i < nItems; ++i)
4426 : {
4427 10 : aValues.push_back(
4428 10 : GetValueFloat16(childArray, nOffsettedIndex * nItems + i));
4429 : }
4430 5 : oFeature.SetField(iOGRFieldIndex, static_cast<int>(aValues.size()),
4431 5 : aValues.data());
4432 : }
4433 41 : else if (IsFloat32(childFormat))
4434 : {
4435 12 : FillFieldFixedSizeList<float, double>(array, iOGRFieldIndex,
4436 : nOffsettedIndex, nItems,
4437 : childArray, oFeature);
4438 : }
4439 29 : else if (IsFloat64(childFormat))
4440 : {
4441 12 : FillFieldFixedSizeList<double, double>(array, iOGRFieldIndex,
4442 : nOffsettedIndex, nItems,
4443 : childArray, oFeature);
4444 : }
4445 17 : else if (IsString(childFormat))
4446 : {
4447 12 : FillFieldFixedSizeListString<uint32_t>(array, iOGRFieldIndex,
4448 : nOffsettedIndex, nItems,
4449 : childArray, oFeature);
4450 : }
4451 5 : else if (IsLargeString(childFormat))
4452 : {
4453 5 : FillFieldFixedSizeListString<uint64_t>(array, iOGRFieldIndex,
4454 : nOffsettedIndex, nItems,
4455 : childArray, oFeature);
4456 : }
4457 : }
4458 497 : else if (IsList(format) || IsLargeList(format))
4459 : {
4460 254 : const auto childArray = array->children[0];
4461 254 : const char *childFormat = schema->children[0]->format;
4462 254 : if (IsBoolean(childFormat))
4463 : {
4464 16 : if (format[1] == ARROW_2ND_LETTER_LIST)
4465 12 : FillFieldListFromBool<uint32_t>(array, iOGRFieldIndex,
4466 : nOffsettedIndex, childArray,
4467 : oFeature);
4468 : else
4469 4 : FillFieldListFromBool<uint64_t>(array, iOGRFieldIndex,
4470 : nOffsettedIndex, childArray,
4471 : oFeature);
4472 : }
4473 238 : else if (IsInt8(childFormat))
4474 : {
4475 14 : if (format[1] == ARROW_2ND_LETTER_LIST)
4476 10 : FillFieldList<uint32_t, int8_t, int>(array, iOGRFieldIndex,
4477 : nOffsettedIndex,
4478 : childArray, oFeature);
4479 : else
4480 4 : FillFieldList<uint64_t, int8_t, int>(array, iOGRFieldIndex,
4481 : nOffsettedIndex,
4482 : childArray, oFeature);
4483 : }
4484 224 : else if (IsUInt8(childFormat))
4485 : {
4486 18 : if (format[1] == ARROW_2ND_LETTER_LIST)
4487 14 : FillFieldList<uint32_t, uint8_t, int>(array, iOGRFieldIndex,
4488 : nOffsettedIndex,
4489 : childArray, oFeature);
4490 : else
4491 4 : FillFieldList<uint64_t, uint8_t, int>(array, iOGRFieldIndex,
4492 : nOffsettedIndex,
4493 : childArray, oFeature);
4494 : }
4495 206 : else if (IsInt16(childFormat))
4496 : {
4497 16 : if (format[1] == ARROW_2ND_LETTER_LIST)
4498 12 : FillFieldList<uint32_t, int16_t, int>(array, iOGRFieldIndex,
4499 : nOffsettedIndex,
4500 : childArray, oFeature);
4501 : else
4502 4 : FillFieldList<uint64_t, int16_t, int>(array, iOGRFieldIndex,
4503 : nOffsettedIndex,
4504 : childArray, oFeature);
4505 : }
4506 190 : else if (IsUInt16(childFormat))
4507 : {
4508 14 : if (format[1] == ARROW_2ND_LETTER_LIST)
4509 10 : FillFieldList<uint32_t, uint16_t, int>(array, iOGRFieldIndex,
4510 : nOffsettedIndex,
4511 : childArray, oFeature);
4512 : else
4513 4 : FillFieldList<uint64_t, uint16_t, int>(array, iOGRFieldIndex,
4514 : nOffsettedIndex,
4515 : childArray, oFeature);
4516 : }
4517 176 : else if (IsInt32(childFormat))
4518 : {
4519 18 : if (format[1] == ARROW_2ND_LETTER_LIST)
4520 14 : FillFieldList<uint32_t, int32_t, int>(array, iOGRFieldIndex,
4521 : nOffsettedIndex,
4522 : childArray, oFeature);
4523 : else
4524 4 : FillFieldList<uint64_t, int32_t, int>(array, iOGRFieldIndex,
4525 : nOffsettedIndex,
4526 : childArray, oFeature);
4527 : }
4528 158 : else if (IsUInt32(childFormat))
4529 : {
4530 8 : if (format[1] == ARROW_2ND_LETTER_LIST)
4531 4 : FillFieldList<uint32_t, uint32_t, GIntBig>(
4532 : array, iOGRFieldIndex, nOffsettedIndex, childArray,
4533 : oFeature);
4534 : else
4535 4 : FillFieldList<uint64_t, uint32_t, GIntBig>(
4536 : array, iOGRFieldIndex, nOffsettedIndex, childArray,
4537 : oFeature);
4538 : }
4539 150 : else if (IsInt64(childFormat))
4540 : {
4541 31 : if (format[1] == ARROW_2ND_LETTER_LIST)
4542 27 : FillFieldList<uint32_t, int64_t, GIntBig>(array, iOGRFieldIndex,
4543 : nOffsettedIndex,
4544 : childArray, oFeature);
4545 : else
4546 4 : FillFieldList<uint64_t, int64_t, GIntBig>(array, iOGRFieldIndex,
4547 : nOffsettedIndex,
4548 : childArray, oFeature);
4549 : }
4550 119 : else if (IsUInt64(childFormat)) // (lossy conversion)
4551 : {
4552 14 : if (format[1] == ARROW_2ND_LETTER_LIST)
4553 10 : FillFieldList<uint32_t, uint64_t, double>(array, iOGRFieldIndex,
4554 : nOffsettedIndex,
4555 : childArray, oFeature);
4556 : else
4557 4 : FillFieldList<uint64_t, uint64_t, double>(array, iOGRFieldIndex,
4558 : nOffsettedIndex,
4559 : childArray, oFeature);
4560 : }
4561 105 : else if (IsFloat16(childFormat))
4562 : {
4563 8 : if (format[1] == ARROW_2ND_LETTER_LIST)
4564 4 : FillFieldListFromHalfFloat<uint32_t>(array, iOGRFieldIndex,
4565 : nOffsettedIndex,
4566 : childArray, oFeature);
4567 : else
4568 4 : FillFieldListFromHalfFloat<uint64_t>(array, iOGRFieldIndex,
4569 : nOffsettedIndex,
4570 : childArray, oFeature);
4571 : }
4572 97 : else if (IsFloat32(childFormat))
4573 : {
4574 16 : if (format[1] == ARROW_2ND_LETTER_LIST)
4575 12 : FillFieldList<uint32_t, float, double>(array, iOGRFieldIndex,
4576 : nOffsettedIndex,
4577 : childArray, oFeature);
4578 : else
4579 4 : FillFieldList<uint64_t, float, double>(array, iOGRFieldIndex,
4580 : nOffsettedIndex,
4581 : childArray, oFeature);
4582 : }
4583 81 : else if (IsFloat64(childFormat))
4584 : {
4585 18 : if (format[1] == ARROW_2ND_LETTER_LIST)
4586 14 : FillFieldList<uint32_t, double, double>(array, iOGRFieldIndex,
4587 : nOffsettedIndex,
4588 : childArray, oFeature);
4589 : else
4590 4 : FillFieldList<uint64_t, double, double>(array, iOGRFieldIndex,
4591 : nOffsettedIndex,
4592 : childArray, oFeature);
4593 : }
4594 63 : else if (IsString(childFormat))
4595 : {
4596 18 : if (format[1] == ARROW_2ND_LETTER_LIST)
4597 14 : FillFieldListFromString<uint32_t, uint32_t>(
4598 : array, iOGRFieldIndex, nOffsettedIndex, childArray,
4599 : oFeature);
4600 : else
4601 4 : FillFieldListFromString<uint64_t, uint32_t>(
4602 : array, iOGRFieldIndex, nOffsettedIndex, childArray,
4603 : oFeature);
4604 : }
4605 45 : else if (IsLargeString(childFormat))
4606 : {
4607 14 : if (format[1] == ARROW_2ND_LETTER_LIST)
4608 10 : FillFieldListFromString<uint32_t, uint64_t>(
4609 : array, iOGRFieldIndex, nOffsettedIndex, childArray,
4610 : oFeature);
4611 : else
4612 4 : FillFieldListFromString<uint64_t, uint64_t>(
4613 : array, iOGRFieldIndex, nOffsettedIndex, childArray,
4614 : oFeature);
4615 : }
4616 31 : else if (format[1] == ARROW_2ND_LETTER_LIST)
4617 : {
4618 31 : const size_t iFeature =
4619 31 : static_cast<size_t>(nOffsettedIndex - array->offset);
4620 31 : oFeature.SetField(iOGRFieldIndex,
4621 62 : GetListAsJSON<uint32_t>(schema, array, iFeature)
4622 62 : .Format(CPLJSONObject::PrettyFormat::Plain)
4623 : .c_str());
4624 : }
4625 : else
4626 : {
4627 0 : const size_t iFeature =
4628 0 : static_cast<size_t>(nOffsettedIndex - array->offset);
4629 0 : oFeature.SetField(iOGRFieldIndex,
4630 0 : GetListAsJSON<uint64_t>(schema, array, iFeature)
4631 0 : .Format(CPLJSONObject::PrettyFormat::Plain)
4632 : .c_str());
4633 : }
4634 : }
4635 243 : else if (IsDecimal(format))
4636 : {
4637 4 : int nPrecision = 0;
4638 4 : int nScale = 0;
4639 4 : int nWidthInBytes = 0;
4640 4 : if (!ParseDecimalFormat(format, nPrecision, nScale, nWidthInBytes))
4641 : {
4642 0 : CPLAssert(false);
4643 : }
4644 :
4645 : // fits on a int64
4646 4 : CPLAssert(nPrecision <= 19);
4647 : // either 128 or 256 bits
4648 4 : CPLAssert((nWidthInBytes % 8) == 0);
4649 4 : const int nWidthIn64BitWord = nWidthInBytes / 8;
4650 4 : const size_t iFeature =
4651 4 : static_cast<size_t>(nOffsettedIndex - array->offset);
4652 4 : oFeature.SetField(
4653 : iOGRFieldIndex,
4654 : GetValueDecimal(array, nWidthIn64BitWord, nScale, iFeature));
4655 4 : return true;
4656 : }
4657 239 : else if (IsMap(format))
4658 : {
4659 239 : const size_t iFeature =
4660 239 : static_cast<size_t>(nOffsettedIndex - array->offset);
4661 239 : oFeature.SetField(iOGRFieldIndex,
4662 478 : GetMapAsJSON(schema, array, iFeature)
4663 478 : .Format(CPLJSONObject::PrettyFormat::Plain)
4664 : .c_str());
4665 : }
4666 : else
4667 : {
4668 0 : return false;
4669 : }
4670 :
4671 819 : return true;
4672 : }
4673 :
4674 : /************************************************************************/
4675 : /* FillValidityArrayFromAttrQuery() */
4676 : /************************************************************************/
4677 :
4678 134 : static size_t FillValidityArrayFromAttrQuery(
4679 : const OGRLayer *poLayer, OGRFeatureQuery *poAttrQuery,
4680 : const struct ArrowSchema *schema, struct ArrowArray *array,
4681 : std::vector<bool> &abyValidityFromFilters, CSLConstList papszOptions)
4682 : {
4683 134 : size_t nCountIntersecting = 0;
4684 134 : auto poFeatureDefn = const_cast<OGRLayer *>(poLayer)->GetLayerDefn();
4685 268 : OGRFeature oFeature(poFeatureDefn);
4686 :
4687 268 : std::map<std::string, std::vector<int>> oMapFieldNameToArrowPath;
4688 268 : std::vector<int> anArrowPathTmp;
4689 134 : BuildMapFieldNameToArrowPath(schema, oMapFieldNameToArrowPath,
4690 268 : std::string(), anArrowPathTmp);
4691 :
4692 : struct UsedFieldsInfo
4693 : {
4694 : int iOGRFieldIndex{};
4695 : std::vector<int> anArrowPath{};
4696 : };
4697 :
4698 268 : std::vector<UsedFieldsInfo> aoUsedFieldsInfo;
4699 :
4700 134 : bool bNeedsFID = false;
4701 268 : const CPLStringList aosUsedFields(poAttrQuery->GetUsedFields());
4702 252 : for (int i = 0; i < aosUsedFields.size(); ++i)
4703 : {
4704 118 : int iOGRFieldIndex = poFeatureDefn->GetFieldIndex(aosUsedFields[i]);
4705 118 : if (iOGRFieldIndex >= 0)
4706 : {
4707 112 : const auto oIter = oMapFieldNameToArrowPath.find(aosUsedFields[i]);
4708 112 : if (oIter != oMapFieldNameToArrowPath.end())
4709 : {
4710 224 : UsedFieldsInfo info;
4711 112 : info.iOGRFieldIndex = iOGRFieldIndex;
4712 112 : info.anArrowPath = oIter->second;
4713 112 : aoUsedFieldsInfo.push_back(info);
4714 : }
4715 : else
4716 : {
4717 0 : CPLError(CE_Failure, CPLE_AppDefined,
4718 : "Cannot find %s in oMapFieldNameToArrowPath",
4719 : aosUsedFields[i]);
4720 : }
4721 : }
4722 6 : else if (EQUAL(aosUsedFields[i], "FID"))
4723 : {
4724 6 : bNeedsFID = true;
4725 : }
4726 : else
4727 : {
4728 0 : CPLDebug("OGR", "Cannot find used field %s", aosUsedFields[i]);
4729 : }
4730 : }
4731 :
4732 134 : const size_t nLength = abyValidityFromFilters.size();
4733 :
4734 134 : GIntBig nBaseSeqFID = -1;
4735 268 : std::vector<int> anArrowPathToFIDColumn;
4736 134 : if (bNeedsFID)
4737 : {
4738 : // BASE_SEQUENTIAL_FID is set when there is no Arrow column for the FID
4739 : // and we assume sequential FID numbering
4740 : const char *pszBaseSeqFID =
4741 6 : CSLFetchNameValue(papszOptions, "BASE_SEQUENTIAL_FID");
4742 6 : if (pszBaseSeqFID)
4743 : {
4744 5 : nBaseSeqFID = CPLAtoGIntBig(pszBaseSeqFID);
4745 :
4746 : // Optimizimation for "FID = constant"
4747 : swq_expr_node *poNode =
4748 5 : static_cast<swq_expr_node *>(poAttrQuery->GetSWQExpr());
4749 15 : if (poNode->eNodeType == SNT_OPERATION &&
4750 5 : poNode->nOperation == SWQ_EQ && poNode->nSubExprCount == 2 &&
4751 2 : poNode->papoSubExpr[0]->eNodeType == SNT_COLUMN &&
4752 2 : poNode->papoSubExpr[1]->eNodeType == SNT_CONSTANT &&
4753 2 : poNode->papoSubExpr[0]->field_index ==
4754 12 : poFeatureDefn->GetFieldCount() + SPF_FID &&
4755 2 : poNode->papoSubExpr[1]->field_type == SWQ_INTEGER64)
4756 : {
4757 2 : if (nBaseSeqFID + static_cast<int64_t>(nLength) <
4758 2 : poNode->papoSubExpr[1]->int_value ||
4759 2 : nBaseSeqFID > poNode->papoSubExpr[1]->int_value)
4760 : {
4761 0 : return 0;
4762 : }
4763 : }
4764 : }
4765 : else
4766 : {
4767 : const char *pszFIDColumn =
4768 1 : const_cast<OGRLayer *>(poLayer)->GetFIDColumn();
4769 1 : if (pszFIDColumn && pszFIDColumn[0])
4770 : {
4771 1 : const auto oIter = oMapFieldNameToArrowPath.find(pszFIDColumn);
4772 1 : if (oIter != oMapFieldNameToArrowPath.end())
4773 : {
4774 1 : anArrowPathToFIDColumn = oIter->second;
4775 : }
4776 : }
4777 1 : if (anArrowPathToFIDColumn.empty())
4778 : {
4779 0 : CPLError(CE_Failure, CPLE_AppDefined,
4780 : "Filtering on FID requested but cannot associate a "
4781 : "FID with Arrow records");
4782 : }
4783 : }
4784 : }
4785 :
4786 555 : for (size_t iRow = 0; iRow < nLength; ++iRow)
4787 : {
4788 421 : if (!abyValidityFromFilters[iRow])
4789 2 : continue;
4790 :
4791 419 : if (bNeedsFID)
4792 : {
4793 21 : if (nBaseSeqFID >= 0)
4794 : {
4795 11 : oFeature.SetFID(nBaseSeqFID + iRow);
4796 : }
4797 10 : else if (!anArrowPathToFIDColumn.empty())
4798 : {
4799 10 : oFeature.SetFID(OGRNullFID);
4800 :
4801 10 : const struct ArrowSchema *psSchemaField = schema;
4802 10 : const struct ArrowArray *psArray = array;
4803 10 : bool bSkip = false;
4804 20 : for (size_t i = 0; i < anArrowPathToFIDColumn.size(); ++i)
4805 : {
4806 10 : const int iChild = anArrowPathToFIDColumn[i];
4807 10 : if (i > 0)
4808 : {
4809 0 : const uint8_t *pabyValidity =
4810 0 : psArray->null_count == 0
4811 0 : ? nullptr
4812 : : static_cast<uint8_t *>(
4813 0 : const_cast<void *>(psArray->buffers[0]));
4814 0 : const size_t nOffsettedIndex =
4815 0 : static_cast<size_t>(iRow + psArray->offset);
4816 0 : if (pabyValidity &&
4817 0 : !TestBit(pabyValidity, nOffsettedIndex))
4818 : {
4819 0 : bSkip = true;
4820 0 : break;
4821 : }
4822 : }
4823 :
4824 10 : psSchemaField = psSchemaField->children[iChild];
4825 10 : psArray = psArray->children[iChild];
4826 : }
4827 10 : if (bSkip)
4828 0 : continue;
4829 :
4830 10 : const char *format = psSchemaField->format;
4831 10 : const uint8_t *pabyValidity =
4832 10 : psArray->null_count == 0
4833 10 : ? nullptr
4834 : : static_cast<uint8_t *>(
4835 0 : const_cast<void *>(psArray->buffers[0]));
4836 10 : const size_t nOffsettedIndex =
4837 10 : static_cast<size_t>(iRow + psArray->offset);
4838 10 : if (pabyValidity && !TestBit(pabyValidity, nOffsettedIndex))
4839 : {
4840 : // do nothing
4841 : }
4842 10 : else if (IsInt32(format))
4843 : {
4844 0 : oFeature.SetFID(static_cast<const int32_t *>(
4845 0 : psArray->buffers[1])[nOffsettedIndex]);
4846 : }
4847 10 : else if (IsInt64(format))
4848 : {
4849 10 : oFeature.SetFID(static_cast<const int64_t *>(
4850 10 : psArray->buffers[1])[nOffsettedIndex]);
4851 : }
4852 : }
4853 : }
4854 :
4855 725 : for (const auto &sInfo : aoUsedFieldsInfo)
4856 : {
4857 306 : const int iOGRFieldIndex = sInfo.iOGRFieldIndex;
4858 306 : const struct ArrowSchema *psSchemaField = schema;
4859 306 : const struct ArrowArray *psArray = array;
4860 306 : bool bSkip = false;
4861 612 : for (size_t i = 0; i < sInfo.anArrowPath.size(); ++i)
4862 : {
4863 306 : const int iChild = sInfo.anArrowPath[i];
4864 306 : if (i > 0)
4865 : {
4866 0 : const uint8_t *pabyValidity =
4867 0 : psArray->null_count == 0
4868 0 : ? nullptr
4869 : : static_cast<uint8_t *>(
4870 0 : const_cast<void *>(psArray->buffers[0]));
4871 0 : const size_t nOffsettedIndex =
4872 0 : static_cast<size_t>(iRow + psArray->offset);
4873 0 : if (pabyValidity && !TestBit(pabyValidity, nOffsettedIndex))
4874 : {
4875 0 : bSkip = true;
4876 0 : oFeature.SetFieldNull(iOGRFieldIndex);
4877 0 : break;
4878 : }
4879 : }
4880 :
4881 306 : psSchemaField = psSchemaField->children[iChild];
4882 306 : psArray = psArray->children[iChild];
4883 : }
4884 306 : if (bSkip)
4885 0 : continue;
4886 :
4887 306 : const char *format = psSchemaField->format;
4888 306 : const uint8_t *pabyValidity =
4889 306 : psArray->null_count == 0
4890 306 : ? nullptr
4891 : : static_cast<uint8_t *>(
4892 129 : const_cast<void *>(psArray->buffers[0]));
4893 306 : const size_t nOffsettedIndex =
4894 306 : static_cast<size_t>(iRow + psArray->offset);
4895 306 : if (pabyValidity && !TestBit(pabyValidity, nOffsettedIndex))
4896 : {
4897 38 : oFeature.SetFieldNull(iOGRFieldIndex);
4898 : }
4899 268 : else if (IsBoolean(format))
4900 : {
4901 78 : oFeature.SetField(
4902 : iOGRFieldIndex,
4903 78 : TestBit(static_cast<const uint8_t *>(psArray->buffers[1]),
4904 : nOffsettedIndex));
4905 : }
4906 190 : else if (IsInt8(format))
4907 : {
4908 8 : oFeature.SetField(iOGRFieldIndex,
4909 8 : static_cast<const int8_t *>(
4910 8 : psArray->buffers[1])[nOffsettedIndex]);
4911 : }
4912 182 : else if (IsUInt8(format))
4913 : {
4914 4 : oFeature.SetField(iOGRFieldIndex,
4915 4 : static_cast<const uint8_t *>(
4916 4 : psArray->buffers[1])[nOffsettedIndex]);
4917 : }
4918 178 : else if (IsInt16(format))
4919 : {
4920 16 : oFeature.SetField(iOGRFieldIndex,
4921 16 : static_cast<const int16_t *>(
4922 16 : psArray->buffers[1])[nOffsettedIndex]);
4923 : }
4924 162 : else if (IsUInt16(format))
4925 : {
4926 2 : oFeature.SetField(iOGRFieldIndex,
4927 2 : static_cast<const uint16_t *>(
4928 2 : psArray->buffers[1])[nOffsettedIndex]);
4929 : }
4930 160 : else if (IsInt32(format))
4931 : {
4932 10 : oFeature.SetField(iOGRFieldIndex,
4933 10 : static_cast<const int32_t *>(
4934 10 : psArray->buffers[1])[nOffsettedIndex]);
4935 : }
4936 150 : else if (IsUInt32(format))
4937 : {
4938 0 : oFeature.SetField(
4939 : iOGRFieldIndex,
4940 0 : static_cast<GIntBig>(static_cast<const uint32_t *>(
4941 0 : psArray->buffers[1])[nOffsettedIndex]));
4942 : }
4943 150 : else if (IsInt64(format))
4944 : {
4945 4 : oFeature.SetField(
4946 : iOGRFieldIndex,
4947 4 : static_cast<GIntBig>(static_cast<const int64_t *>(
4948 4 : psArray->buffers[1])[nOffsettedIndex]));
4949 : }
4950 146 : else if (IsUInt64(format))
4951 : {
4952 4 : oFeature.SetField(
4953 : iOGRFieldIndex,
4954 4 : static_cast<double>(static_cast<const uint64_t *>(
4955 4 : psArray->buffers[1])[nOffsettedIndex]));
4956 : }
4957 142 : else if (IsFloat32(format))
4958 : {
4959 2 : oFeature.SetField(iOGRFieldIndex,
4960 2 : static_cast<const float *>(
4961 2 : psArray->buffers[1])[nOffsettedIndex]);
4962 : }
4963 140 : else if (IsFloat64(format))
4964 : {
4965 26 : oFeature.SetField(iOGRFieldIndex,
4966 26 : static_cast<const double *>(
4967 26 : psArray->buffers[1])[nOffsettedIndex]);
4968 : }
4969 114 : else if (IsString(format))
4970 : {
4971 18 : const auto nOffset = static_cast<const uint32_t *>(
4972 18 : psArray->buffers[1])[nOffsettedIndex];
4973 18 : const auto nNextOffset = static_cast<const uint32_t *>(
4974 18 : psArray->buffers[1])[nOffsettedIndex + 1];
4975 18 : const GByte *pabyData =
4976 18 : static_cast<const GByte *>(psArray->buffers[2]);
4977 18 : const uint32_t nSize = nNextOffset - nOffset;
4978 18 : CPLAssert(oFeature.GetFieldDefnRef(iOGRFieldIndex)->GetType() ==
4979 : OFTString);
4980 18 : char *pszStr = static_cast<char *>(CPLMalloc(nSize + 1));
4981 18 : memcpy(pszStr, pabyData + nOffset, nSize);
4982 18 : pszStr[nSize] = 0;
4983 18 : OGRField *psField = oFeature.GetRawFieldRef(iOGRFieldIndex);
4984 18 : if (IsValidField(psField))
4985 12 : CPLFree(psField->String);
4986 18 : psField->String = pszStr;
4987 : }
4988 96 : else if (IsLargeString(format))
4989 : {
4990 6 : const auto nOffset = static_cast<const uint64_t *>(
4991 6 : psArray->buffers[1])[nOffsettedIndex];
4992 6 : const auto nNextOffset = static_cast<const uint64_t *>(
4993 6 : psArray->buffers[1])[nOffsettedIndex + 1];
4994 6 : const GByte *pabyData =
4995 6 : static_cast<const GByte *>(psArray->buffers[2]);
4996 6 : const size_t nSize = static_cast<size_t>(nNextOffset - nOffset);
4997 6 : char *pszStr = static_cast<char *>(CPLMalloc(nSize + 1));
4998 6 : memcpy(pszStr, pabyData + static_cast<size_t>(nOffset), nSize);
4999 6 : pszStr[nSize] = 0;
5000 6 : OGRField *psField = oFeature.GetRawFieldRef(iOGRFieldIndex);
5001 6 : if (IsValidField(psField))
5002 3 : CPLFree(psField->String);
5003 6 : psField->String = pszStr;
5004 : }
5005 90 : else if (IsBinary(format))
5006 : {
5007 5 : const auto nOffset = static_cast<const uint32_t *>(
5008 5 : psArray->buffers[1])[nOffsettedIndex];
5009 5 : const auto nNextOffset = static_cast<const uint32_t *>(
5010 5 : psArray->buffers[1])[nOffsettedIndex + 1];
5011 5 : const GByte *pabyData =
5012 5 : static_cast<const GByte *>(psArray->buffers[2]);
5013 5 : const uint32_t nSize = nNextOffset - nOffset;
5014 10 : if (nSize >
5015 5 : static_cast<size_t>(std::numeric_limits<int32_t>::max()))
5016 : {
5017 0 : abyValidityFromFilters.clear();
5018 0 : abyValidityFromFilters.resize(nLength);
5019 0 : CPLError(CE_Failure, CPLE_AppDefined,
5020 : "Unexpected error in PostFilterArrowArray(): too "
5021 : "large binary");
5022 0 : return 0;
5023 : }
5024 5 : oFeature.SetField(iOGRFieldIndex, static_cast<int>(nSize),
5025 5 : pabyData + nOffset);
5026 : }
5027 85 : else if (IsLargeBinary(format))
5028 : {
5029 5 : const auto nOffset = static_cast<const uint64_t *>(
5030 5 : psArray->buffers[1])[nOffsettedIndex];
5031 5 : const auto nNextOffset = static_cast<const uint64_t *>(
5032 5 : psArray->buffers[1])[nOffsettedIndex + 1];
5033 5 : const GByte *pabyData =
5034 5 : static_cast<const GByte *>(psArray->buffers[2]);
5035 5 : const uint64_t nSize = nNextOffset - nOffset;
5036 5 : if (nSize >
5037 5 : static_cast<uint64_t>(std::numeric_limits<int32_t>::max()))
5038 : {
5039 0 : abyValidityFromFilters.clear();
5040 0 : abyValidityFromFilters.resize(nLength);
5041 0 : CPLError(CE_Failure, CPLE_AppDefined,
5042 : "Unexpected error in PostFilterArrowArray(): too "
5043 : "large binary");
5044 0 : return 0;
5045 : }
5046 5 : oFeature.SetField(iOGRFieldIndex, static_cast<int>(nSize),
5047 5 : pabyData + nOffset);
5048 : }
5049 80 : else if (!SetFieldForOtherFormats(oFeature, iOGRFieldIndex,
5050 : nOffsettedIndex, psSchemaField,
5051 : psArray))
5052 : {
5053 0 : abyValidityFromFilters.clear();
5054 0 : abyValidityFromFilters.resize(nLength);
5055 0 : CPLError(
5056 : CE_Failure, CPLE_AppDefined,
5057 : "Unexpected error in PostFilterArrowArray(): unhandled "
5058 : "field format: %s",
5059 : format);
5060 0 : return 0;
5061 : }
5062 : }
5063 419 : if (poAttrQuery->Evaluate(&oFeature))
5064 : {
5065 215 : nCountIntersecting++;
5066 : }
5067 : else
5068 : {
5069 204 : abyValidityFromFilters[iRow] = false;
5070 : }
5071 : }
5072 134 : return nCountIntersecting;
5073 : }
5074 :
5075 : /************************************************************************/
5076 : /* OGRLayer::PostFilterArrowArray() */
5077 : /************************************************************************/
5078 :
5079 : /** Remove rows that aren't selected by the spatial or attribute filter.
5080 : *
5081 : * Assumes that CanPostFilterArrowArray() has been called and returned true.
5082 : */
5083 153 : void OGRLayer::PostFilterArrowArray(const struct ArrowSchema *schema,
5084 : struct ArrowArray *array,
5085 : CSLConstList papszOptions) const
5086 : {
5087 153 : if (!m_poFilterGeom && !m_poAttrQuery)
5088 43 : return;
5089 :
5090 153 : CPLAssert(schema->n_children == array->n_children);
5091 :
5092 153 : int64_t iGeomField = -1;
5093 153 : if (m_poFilterGeom)
5094 : {
5095 : const char *pszGeomFieldName =
5096 : const_cast<OGRLayer *>(this)
5097 21 : ->GetLayerDefn()
5098 21 : ->GetGeomFieldDefn(m_iGeomFieldFilter)
5099 21 : ->GetNameRef();
5100 837 : for (int64_t iField = 0; iField < schema->n_children; ++iField)
5101 : {
5102 837 : const auto fieldSchema = schema->children[iField];
5103 837 : if (strcmp(fieldSchema->name, pszGeomFieldName) == 0)
5104 : {
5105 21 : iGeomField = iField;
5106 21 : break;
5107 : }
5108 816 : CPLAssert(array->children[iField]->length ==
5109 : array->children[0]->length);
5110 : }
5111 : // Guaranteed if CanPostFilterArrowArray() returned true
5112 21 : CPLAssert(iGeomField >= 0);
5113 21 : CPLAssert(IsBinary(schema->children[iGeomField]->format) ||
5114 : IsLargeBinary(schema->children[iGeomField]->format));
5115 21 : CPLAssert(array->children[iGeomField]->n_buffers == 3);
5116 : }
5117 :
5118 153 : std::vector<bool> abyValidityFromFilters;
5119 153 : const size_t nLength = static_cast<size_t>(array->length);
5120 : const size_t nCountIntersectingGeom =
5121 174 : m_poFilterGeom ? (IsBinary(schema->children[iGeomField]->format)
5122 42 : ? FillValidityArrayFromWKBArray<uint32_t>(
5123 21 : array->children[iGeomField], this,
5124 : abyValidityFromFilters)
5125 0 : : FillValidityArrayFromWKBArray<uint64_t>(
5126 0 : array->children[iGeomField], this,
5127 : abyValidityFromFilters))
5128 153 : : nLength;
5129 153 : if (!m_poFilterGeom)
5130 132 : abyValidityFromFilters.resize(nLength, true);
5131 : const size_t nCountIntersecting =
5132 134 : m_poAttrQuery && nCountIntersectingGeom > 0
5133 306 : ? FillValidityArrayFromAttrQuery(this, m_poAttrQuery, schema, array,
5134 : abyValidityFromFilters,
5135 : papszOptions)
5136 19 : : m_poFilterGeom ? nCountIntersectingGeom
5137 153 : : nLength;
5138 : // Nothing to do ?
5139 153 : if (nCountIntersecting == nLength)
5140 : {
5141 : // CPLDebug("OGR", "All rows match filter");
5142 43 : return;
5143 : }
5144 :
5145 110 : if (nCountIntersecting == 0)
5146 : {
5147 27 : array->length = 0;
5148 : }
5149 83 : else if (!CompactStructArray(schema, array, 0, abyValidityFromFilters,
5150 : nCountIntersecting))
5151 : {
5152 0 : array->release(array);
5153 0 : memset(array, 0, sizeof(*array));
5154 : }
5155 : }
5156 :
5157 : /************************************************************************/
5158 : /* OGRCloneArrowArray */
5159 : /************************************************************************/
5160 :
5161 13947 : static bool OGRCloneArrowArray(const struct ArrowSchema *schema,
5162 : const struct ArrowArray *src_array,
5163 : struct ArrowArray *out_array,
5164 : size_t nParentOffset)
5165 : {
5166 13947 : memset(out_array, 0, sizeof(*out_array));
5167 13947 : const size_t nLength =
5168 13947 : static_cast<size_t>(src_array->length) - nParentOffset;
5169 13947 : out_array->length = nLength;
5170 13947 : out_array->null_count = src_array->null_count;
5171 13947 : out_array->release = OGRLayerDefaultReleaseArray;
5172 :
5173 13947 : bool bRet = true;
5174 :
5175 13947 : out_array->n_buffers = src_array->n_buffers;
5176 27894 : out_array->buffers = static_cast<const void **>(CPLCalloc(
5177 13947 : static_cast<size_t>(src_array->n_buffers), sizeof(const void *)));
5178 13947 : CPLAssert(static_cast<size_t>(src_array->length) >= nParentOffset);
5179 13947 : const char *format = schema->format;
5180 13947 : const auto nOffset = static_cast<size_t>(src_array->offset) + nParentOffset;
5181 41479 : for (int64_t i = 0; i < src_array->n_buffers; ++i)
5182 : {
5183 27532 : if (i == 0 || IsBoolean(format))
5184 : {
5185 14318 : if (i == 1)
5186 : {
5187 371 : CPLAssert(src_array->buffers[i]);
5188 : }
5189 14318 : if (src_array->buffers[i])
5190 : {
5191 8765 : const size_t nBytes = nLength ? (nLength + 7) / 8 : 1;
5192 : uint8_t *CPL_RESTRICT p = static_cast<uint8_t *>(
5193 8765 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nBytes));
5194 8765 : if (!p)
5195 : {
5196 0 : bRet = false;
5197 0 : break;
5198 : }
5199 8765 : const auto *CPL_RESTRICT pSrcArray =
5200 8765 : static_cast<const uint8_t *>(src_array->buffers[i]);
5201 8765 : if ((nOffset % 8) != 0)
5202 : {
5203 : // Make sure last byte is fully initialized
5204 2281 : p[nBytes - 1] = 0;
5205 7359 : for (size_t iRow = 0; iRow < nLength; ++iRow)
5206 : {
5207 5078 : if (TestBit(pSrcArray, nOffset + iRow))
5208 4949 : SetBit(p, iRow);
5209 : else
5210 129 : UnsetBit(p, iRow);
5211 : }
5212 : }
5213 : else
5214 : {
5215 6484 : memcpy(p, pSrcArray + nOffset / 8, nBytes);
5216 : }
5217 8765 : out_array->buffers[i] = p;
5218 : }
5219 : }
5220 13214 : else if (i == 1)
5221 : {
5222 11083 : CPLAssert(src_array->buffers[i]);
5223 11083 : size_t nEltSize = 0;
5224 11083 : size_t nExtraElt = 0;
5225 11083 : if (IsUInt8(format) || IsInt8(format))
5226 742 : nEltSize = sizeof(uint8_t);
5227 10341 : else if (IsUInt16(format) || IsInt16(format) || IsFloat16(format))
5228 762 : nEltSize = sizeof(uint16_t);
5229 19138 : else if (IsUInt32(format) || IsInt32(format) || IsFloat32(format) ||
5230 27618 : strcmp(format, "tdD") == 0 || strcmp(format, "tts") == 0 ||
5231 8480 : strcmp(format, "ttm") == 0)
5232 : {
5233 1316 : nEltSize = sizeof(uint32_t);
5234 : }
5235 12719 : else if (IsString(format) || IsBinary(format) || IsList(format) ||
5236 4456 : IsMap(format))
5237 : {
5238 4496 : nEltSize = sizeof(uint32_t);
5239 4496 : nExtraElt = 1;
5240 : }
5241 7163 : else if (IsUInt64(format) || IsInt64(format) || IsFloat64(format) ||
5242 1648 : strcmp(format, "tdm") == 0 || strcmp(format, "ttu") == 0 ||
5243 7163 : strcmp(format, "ttn") == 0 || IsTimestamp(format))
5244 : {
5245 2939 : nEltSize = sizeof(uint64_t);
5246 : }
5247 1318 : else if (IsLargeString(format) || IsLargeBinary(format) ||
5248 490 : IsLargeList(format))
5249 : {
5250 343 : nEltSize = sizeof(uint64_t);
5251 343 : nExtraElt = 1;
5252 : }
5253 485 : else if (IsFixedWidthBinary(format))
5254 : {
5255 111 : nEltSize = GetFixedWithBinary(format);
5256 : }
5257 374 : else if (IsDecimal(format))
5258 : {
5259 374 : int nPrecision = 0;
5260 374 : int nScale = 0;
5261 374 : int nWidthInBytes = 0;
5262 374 : if (!ParseDecimalFormat(format, nPrecision, nScale,
5263 : nWidthInBytes))
5264 : {
5265 0 : CPLError(
5266 : CE_Failure, CPLE_AppDefined,
5267 : "Unexpected error in OGRCloneArrowArray(): unhandled "
5268 : "field format: %s",
5269 : format);
5270 :
5271 0 : return false;
5272 : }
5273 374 : nEltSize = nWidthInBytes;
5274 : }
5275 11083 : if (nEltSize)
5276 : {
5277 11083 : void *p = VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
5278 : nLength ? nEltSize * (nLength + nExtraElt) : 1);
5279 11083 : if (!p)
5280 : {
5281 0 : bRet = false;
5282 0 : break;
5283 : }
5284 11083 : if (nLength)
5285 : {
5286 12876 : if ((IsString(format) || IsBinary(format)) &&
5287 1793 : static_cast<const uint32_t *>(
5288 1793 : src_array->buffers[1])[nOffset] != 0)
5289 : {
5290 258 : const auto *CPL_RESTRICT pSrcOffsets =
5291 258 : static_cast<const uint32_t *>(
5292 258 : src_array->buffers[1]) +
5293 : nOffset;
5294 258 : const auto nShiftOffset = pSrcOffsets[0];
5295 258 : auto *CPL_RESTRICT pDstOffsets =
5296 : static_cast<uint32_t *>(p);
5297 1118 : for (size_t iRow = 0; iRow <= nLength; ++iRow)
5298 : {
5299 860 : pDstOffsets[iRow] =
5300 860 : pSrcOffsets[iRow] - nShiftOffset;
5301 : }
5302 : }
5303 11163 : else if ((IsLargeString(format) || IsLargeBinary(format)) &&
5304 338 : static_cast<const uint64_t *>(
5305 338 : src_array->buffers[1])[nOffset] != 0)
5306 : {
5307 86 : const auto *CPL_RESTRICT pSrcOffsets =
5308 86 : static_cast<const uint64_t *>(
5309 86 : src_array->buffers[1]) +
5310 : nOffset;
5311 86 : const auto nShiftOffset = pSrcOffsets[0];
5312 86 : auto *CPL_RESTRICT pDstOffsets =
5313 : static_cast<uint64_t *>(p);
5314 344 : for (size_t iRow = 0; iRow <= nLength; ++iRow)
5315 : {
5316 258 : pDstOffsets[iRow] =
5317 258 : pSrcOffsets[iRow] - nShiftOffset;
5318 : }
5319 : }
5320 : else
5321 : {
5322 10739 : memcpy(
5323 : p,
5324 10739 : static_cast<const GByte *>(src_array->buffers[i]) +
5325 10739 : nEltSize * nOffset,
5326 10739 : nEltSize * (nLength + nExtraElt));
5327 : }
5328 : }
5329 11083 : out_array->buffers[i] = p;
5330 : }
5331 : else
5332 : {
5333 0 : CPLError(CE_Failure, CPLE_AppDefined,
5334 : "OGRCloneArrowArray(): unhandled case, array = %s, "
5335 : "format = '%s', i = 1",
5336 0 : schema->name, format);
5337 0 : bRet = false;
5338 0 : break;
5339 : }
5340 : }
5341 2131 : else if (i == 2)
5342 : {
5343 2131 : CPLAssert(src_array->buffers[i]);
5344 2131 : size_t nSrcCharOffset = 0;
5345 2131 : size_t nCharCount = 0;
5346 2131 : if (IsString(format) || IsBinary(format))
5347 : {
5348 1793 : const auto *pSrcOffsets =
5349 1793 : static_cast<const uint32_t *>(src_array->buffers[1]) +
5350 : nOffset;
5351 1793 : nSrcCharOffset = pSrcOffsets[0];
5352 1793 : nCharCount = pSrcOffsets[nLength] - pSrcOffsets[0];
5353 : }
5354 338 : else if (IsLargeString(format) || IsLargeBinary(format))
5355 : {
5356 338 : const auto *pSrcOffsets =
5357 338 : static_cast<const uint64_t *>(src_array->buffers[1]) +
5358 : nOffset;
5359 338 : nSrcCharOffset = static_cast<size_t>(pSrcOffsets[0]);
5360 338 : nCharCount =
5361 338 : static_cast<size_t>(pSrcOffsets[nLength] - pSrcOffsets[0]);
5362 : }
5363 : else
5364 : {
5365 0 : CPLError(CE_Failure, CPLE_AppDefined,
5366 : "OGRCloneArrowArray(): unhandled case, array = %s, "
5367 : "format = '%s', i = 2",
5368 0 : schema->name, format);
5369 0 : bRet = false;
5370 0 : break;
5371 : }
5372 : void *p =
5373 2131 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nCharCount ? nCharCount : 1);
5374 2131 : if (!p)
5375 : {
5376 0 : bRet = false;
5377 0 : break;
5378 : }
5379 2131 : if (nCharCount)
5380 : {
5381 2131 : memcpy(p,
5382 2131 : static_cast<const GByte *>(src_array->buffers[i]) +
5383 : nSrcCharOffset,
5384 : nCharCount);
5385 : }
5386 2131 : out_array->buffers[i] = p;
5387 : }
5388 : else
5389 : {
5390 0 : CPLError(CE_Failure, CPLE_AppDefined,
5391 : "OGRCloneArrowArray(): unhandled case, array = %s, format "
5392 : "= '%s', i = 3",
5393 0 : schema->name, format);
5394 0 : bRet = false;
5395 0 : break;
5396 : }
5397 : }
5398 :
5399 13947 : if (bRet)
5400 : {
5401 13947 : out_array->n_children = src_array->n_children;
5402 13947 : out_array->children = static_cast<struct ArrowArray **>(
5403 13947 : CPLCalloc(static_cast<size_t>(src_array->n_children),
5404 : sizeof(struct ArrowArray *)));
5405 27653 : for (int64_t i = 0; i < src_array->n_children; ++i)
5406 : {
5407 27412 : out_array->children[i] = static_cast<struct ArrowArray *>(
5408 13706 : CPLCalloc(1, sizeof(struct ArrowArray)));
5409 39777 : if (!OGRCloneArrowArray(schema->children[i], src_array->children[i],
5410 13706 : out_array->children[i],
5411 13706 : IsFixedSizeList(format)
5412 1341 : ? nOffset * GetFixedSizeList(format)
5413 12365 : : IsStructure(format) ? nOffset
5414 : : 0))
5415 : {
5416 0 : bRet = false;
5417 0 : break;
5418 : }
5419 : }
5420 : }
5421 :
5422 13947 : if (bRet && src_array->dictionary)
5423 : {
5424 111 : out_array->dictionary = static_cast<struct ArrowArray *>(
5425 111 : CPLCalloc(1, sizeof(struct ArrowArray)));
5426 111 : bRet = OGRCloneArrowArray(schema->dictionary, src_array->dictionary,
5427 : out_array->dictionary, 0);
5428 : }
5429 :
5430 13947 : if (!bRet)
5431 : {
5432 0 : out_array->release(out_array);
5433 0 : memset(out_array, 0, sizeof(*out_array));
5434 : }
5435 13947 : return bRet;
5436 : }
5437 :
5438 : /** Full/deep copy of an array.
5439 : *
5440 : * Renormalize the offset of the array (and its children) to 0.
5441 : *
5442 : * In case of failure, out_array will be let in a released state.
5443 : *
5444 : * @param schema Schema of the array. Must *NOT* be NULL.
5445 : * @param src_array Source array. Must *NOT* be NULL.
5446 : * @param out_array Output array. Must *NOT* be NULL (but its content may be random)
5447 : * @return true if success.
5448 : */
5449 130 : bool OGRCloneArrowArray(const struct ArrowSchema *schema,
5450 : const struct ArrowArray *src_array,
5451 : struct ArrowArray *out_array)
5452 : {
5453 130 : return OGRCloneArrowArray(schema, src_array, out_array, 0);
5454 : }
5455 :
5456 : /************************************************************************/
5457 : /* OGRCloneArrowMetadata() */
5458 : /************************************************************************/
5459 :
5460 23 : static void *OGRCloneArrowMetadata(const void *pMetadata)
5461 : {
5462 23 : if (!pMetadata)
5463 19 : return nullptr;
5464 4 : std::vector<GByte> abyOut;
5465 4 : const GByte *pabyMetadata = static_cast<const GByte *>(pMetadata);
5466 : int32_t nKVP;
5467 4 : abyOut.insert(abyOut.end(), pabyMetadata, pabyMetadata + sizeof(int32_t));
5468 4 : memcpy(&nKVP, pabyMetadata, sizeof(int32_t));
5469 4 : pabyMetadata += sizeof(int32_t);
5470 8 : for (int i = 0; i < nKVP; ++i)
5471 : {
5472 : int32_t nSizeKey;
5473 0 : abyOut.insert(abyOut.end(), pabyMetadata,
5474 4 : pabyMetadata + sizeof(int32_t));
5475 4 : memcpy(&nSizeKey, pabyMetadata, sizeof(int32_t));
5476 4 : pabyMetadata += sizeof(int32_t);
5477 4 : abyOut.insert(abyOut.end(), pabyMetadata, pabyMetadata + nSizeKey);
5478 4 : pabyMetadata += nSizeKey;
5479 :
5480 : int32_t nSizeValue;
5481 0 : abyOut.insert(abyOut.end(), pabyMetadata,
5482 4 : pabyMetadata + sizeof(int32_t));
5483 4 : memcpy(&nSizeValue, pabyMetadata, sizeof(int32_t));
5484 4 : pabyMetadata += sizeof(int32_t);
5485 4 : abyOut.insert(abyOut.end(), pabyMetadata, pabyMetadata + nSizeValue);
5486 4 : pabyMetadata += nSizeValue;
5487 : }
5488 :
5489 4 : GByte *pabyOut = static_cast<GByte *>(VSI_MALLOC_VERBOSE(abyOut.size()));
5490 4 : if (pabyOut)
5491 4 : memcpy(pabyOut, abyOut.data(), abyOut.size());
5492 4 : return pabyOut;
5493 : }
5494 :
5495 : /************************************************************************/
5496 : /* OGRCloneArrowSchema() */
5497 : /************************************************************************/
5498 :
5499 : /** Full/deep copy of a schema.
5500 : *
5501 : * In case of failure, out_schema will be let in a released state.
5502 : *
5503 : * @param schema Schema to clone. Must *NOT* be NULL.
5504 : * @param out_schema Output schema. Must *NOT* be NULL (but its content may be random)
5505 : * @return true if success.
5506 : */
5507 23 : bool OGRCloneArrowSchema(const struct ArrowSchema *schema,
5508 : struct ArrowSchema *out_schema)
5509 : {
5510 23 : memset(out_schema, 0, sizeof(*out_schema));
5511 23 : out_schema->release = OGRLayerFullReleaseSchema;
5512 23 : out_schema->format = CPLStrdup(schema->format);
5513 23 : out_schema->name = CPLStrdup(schema->name);
5514 23 : out_schema->metadata = static_cast<const char *>(
5515 23 : const_cast<const void *>(OGRCloneArrowMetadata(schema->metadata)));
5516 23 : out_schema->flags = schema->flags;
5517 23 : if (schema->n_children)
5518 : {
5519 5 : out_schema->children =
5520 5 : static_cast<struct ArrowSchema **>(VSI_CALLOC_VERBOSE(
5521 : static_cast<int>(schema->n_children), sizeof(ArrowSchema *)));
5522 5 : if (!out_schema->children)
5523 : {
5524 0 : out_schema->release(out_schema);
5525 0 : return false;
5526 : }
5527 5 : out_schema->n_children = schema->n_children;
5528 23 : for (int i = 0; i < static_cast<int>(schema->n_children); ++i)
5529 : {
5530 36 : out_schema->children[i] = static_cast<struct ArrowSchema *>(
5531 18 : CPLMalloc(sizeof(ArrowSchema)));
5532 18 : if (!OGRCloneArrowSchema(schema->children[i],
5533 18 : out_schema->children[i]))
5534 : {
5535 0 : out_schema->release(out_schema);
5536 0 : return false;
5537 : }
5538 : }
5539 : }
5540 23 : if (schema->dictionary)
5541 : {
5542 0 : out_schema->dictionary =
5543 0 : static_cast<struct ArrowSchema *>(CPLMalloc(sizeof(ArrowSchema)));
5544 0 : if (!OGRCloneArrowSchema(schema->dictionary, out_schema->dictionary))
5545 : {
5546 0 : out_schema->release(out_schema);
5547 0 : return false;
5548 : }
5549 : }
5550 23 : return true;
5551 : }
5552 :
5553 : /************************************************************************/
5554 : /* OGRLayer::IsArrowSchemaSupported() */
5555 : /************************************************************************/
5556 :
5557 : const struct
5558 : {
5559 : const char *arrowType;
5560 : OGRFieldType eType;
5561 : OGRFieldSubType eSubType;
5562 : } gasArrowTypesToOGR[] = {
5563 : {"b", OFTInteger, OFSTBoolean}, {"c", OFTInteger, OFSTInt16}, // Int8
5564 : {"C", OFTInteger, OFSTInt16}, // UInt8
5565 : {"s", OFTInteger, OFSTInt16}, // Int16
5566 : {"S", OFTInteger, OFSTNone}, // UInt16
5567 : {"i", OFTInteger, OFSTNone}, // Int32
5568 : {"I", OFTInteger64, OFSTNone}, // UInt32
5569 : {"l", OFTInteger64, OFSTNone}, // Int64
5570 : {"L", OFTReal, OFSTNone}, // UInt64 (potentially lossy conversion if going through OGRFeature)
5571 : {"e", OFTReal, OFSTFloat32}, // float16
5572 : {"f", OFTReal, OFSTFloat32}, // float32
5573 : {"g", OFTReal, OFSTNone}, // float64
5574 : {"z", OFTBinary, OFSTNone}, // binary
5575 : {"Z", OFTBinary, OFSTNone}, // large binary (will be limited to 32 bit length though if going through OGRFeature!)
5576 : {"u", OFTString, OFSTNone}, // string
5577 : {"U", OFTString, OFSTNone}, // large string
5578 : {"tdD", OFTDate, OFSTNone}, // date32[days]
5579 : {"tdm", OFTDate, OFSTNone}, // date64[milliseconds]
5580 : {"tts", OFTTime, OFSTNone}, // time32 [seconds]
5581 : {"ttm", OFTTime, OFSTNone}, // time32 [milliseconds]
5582 : {"ttu", OFTTime, OFSTNone}, // time64 [microseconds]
5583 : {"ttn", OFTTime, OFSTNone}, // time64 [nanoseconds]
5584 : };
5585 :
5586 : const struct
5587 : {
5588 : const char arrowLetter;
5589 : OGRFieldType eType;
5590 : OGRFieldSubType eSubType;
5591 : } gasListTypes[] = {
5592 : {ARROW_LETTER_BOOLEAN, OFTIntegerList, OFSTBoolean},
5593 : {ARROW_LETTER_INT8, OFTIntegerList, OFSTInt16},
5594 : {ARROW_LETTER_UINT8, OFTIntegerList, OFSTInt16},
5595 : {ARROW_LETTER_INT16, OFTIntegerList, OFSTInt16},
5596 : {ARROW_LETTER_UINT16, OFTIntegerList, OFSTNone},
5597 : {ARROW_LETTER_INT32, OFTIntegerList, OFSTNone},
5598 : {ARROW_LETTER_UINT32, OFTInteger64List, OFSTNone},
5599 : {ARROW_LETTER_INT64, OFTInteger64List, OFSTNone},
5600 : {ARROW_LETTER_UINT64, OFTRealList,
5601 : OFSTNone}, //(potentially lossy conversion if going through OGRFeature)
5602 : {ARROW_LETTER_FLOAT16, OFTRealList, OFSTFloat32},
5603 : {ARROW_LETTER_FLOAT32, OFTRealList, OFSTFloat32},
5604 : {ARROW_LETTER_FLOAT64, OFTRealList, OFSTNone},
5605 : {ARROW_LETTER_STRING, OFTStringList, OFSTNone},
5606 : {ARROW_LETTER_LARGE_STRING, OFTStringList, OFSTNone},
5607 : };
5608 :
5609 43 : static inline bool IsValidDictionaryIndexType(const char *format)
5610 : {
5611 40 : return (format[0] == ARROW_LETTER_INT8 || format[0] == ARROW_LETTER_UINT8 ||
5612 37 : format[0] == ARROW_LETTER_INT16 ||
5613 34 : format[0] == ARROW_LETTER_UINT16 ||
5614 31 : format[0] == ARROW_LETTER_INT32 ||
5615 9 : format[0] == ARROW_LETTER_UINT32 ||
5616 6 : format[0] == ARROW_LETTER_INT64 ||
5617 89 : format[0] == ARROW_LETTER_UINT64) &&
5618 86 : format[1] == 0;
5619 : }
5620 :
5621 230 : static bool IsSupportForJSONObj(const struct ArrowSchema *schema)
5622 : {
5623 230 : const char *format = schema->format;
5624 230 : if (IsStructure(format))
5625 : {
5626 35 : for (int64_t i = 0; i < schema->n_children; ++i)
5627 : {
5628 26 : if (!IsSupportForJSONObj(schema->children[i]))
5629 0 : return false;
5630 : }
5631 9 : return true;
5632 : }
5633 :
5634 2752 : for (const auto &sType : gasListTypes)
5635 : {
5636 2626 : if (format[0] == sType.arrowLetter && format[1] == 0)
5637 : {
5638 95 : return true;
5639 : }
5640 : }
5641 :
5642 126 : if (IsBinary(format) || IsLargeBinary(format) || IsFixedWidthBinary(format))
5643 12 : return true;
5644 :
5645 114 : if (IsDecimal(format))
5646 : {
5647 6 : int nPrecision = 0;
5648 6 : int nScale = 0;
5649 6 : int nWidthInBytes = 0;
5650 6 : if (!ParseDecimalFormat(format, nPrecision, nScale, nWidthInBytes))
5651 : {
5652 0 : CPLError(CE_Failure, CPLE_AppDefined, "Invalid field format %s",
5653 : format);
5654 0 : return false;
5655 : }
5656 :
5657 6 : return GetErrorIfUnsupportedDecimal(nWidthInBytes, nPrecision) ==
5658 6 : nullptr;
5659 : }
5660 :
5661 108 : if (IsMap(format))
5662 : {
5663 74 : return IsStructure(schema->children[0]->format) &&
5664 148 : schema->children[0]->n_children == 2 &&
5665 222 : IsString(schema->children[0]->children[0]->format) &&
5666 148 : IsSupportForJSONObj(schema->children[0]->children[1]);
5667 : }
5668 :
5669 34 : if (IsList(format) || IsLargeList(format) || IsFixedSizeList(format))
5670 : {
5671 34 : return IsSupportForJSONObj(schema->children[0]);
5672 : }
5673 :
5674 0 : return false;
5675 : }
5676 :
5677 490 : static bool IsArrowSchemaSupportedInternal(const struct ArrowSchema *schema,
5678 : const std::string &osFieldPrefix,
5679 : std::string &osErrorMsg)
5680 : {
5681 0 : const auto AppendError = [&osErrorMsg](const std::string &osMsg)
5682 : {
5683 0 : if (!osErrorMsg.empty())
5684 0 : osErrorMsg += " ";
5685 0 : osErrorMsg += osMsg;
5686 490 : };
5687 :
5688 490 : const char *fieldName = schema->name;
5689 490 : const char *format = schema->format;
5690 490 : if (IsStructure(format))
5691 : {
5692 5 : bool bRet = true;
5693 5 : const std::string osNewPrefix(osFieldPrefix + fieldName + ".");
5694 21 : for (int64_t i = 0; i < schema->n_children; ++i)
5695 : {
5696 16 : if (!IsArrowSchemaSupportedInternal(schema->children[i],
5697 : osNewPrefix, osErrorMsg))
5698 0 : bRet = false;
5699 : }
5700 5 : return bRet;
5701 : }
5702 :
5703 485 : if (schema->dictionary)
5704 : {
5705 15 : if (!IsValidDictionaryIndexType(format))
5706 : {
5707 0 : AppendError("Dictionary only supported if the parent is of "
5708 : "type [U]Int[8|16|32|64]");
5709 0 : return false;
5710 : }
5711 :
5712 15 : schema = schema->dictionary;
5713 15 : format = schema->format;
5714 : }
5715 :
5716 485 : if (IsList(format) || IsLargeList(format) || IsFixedSizeList(format))
5717 : {
5718 : // Only some subtypes supported
5719 132 : const char *childFormat = schema->children[0]->format;
5720 1103 : for (const auto &sType : gasListTypes)
5721 : {
5722 1088 : if (childFormat[0] == sType.arrowLetter && childFormat[1] == 0)
5723 : {
5724 117 : return true;
5725 : }
5726 : }
5727 :
5728 15 : if (IsDecimal(childFormat))
5729 : {
5730 7 : int nPrecision = 0;
5731 7 : int nScale = 0;
5732 7 : int nWidthInBytes = 0;
5733 7 : if (!ParseDecimalFormat(childFormat, nPrecision, nScale,
5734 : nWidthInBytes))
5735 : {
5736 0 : AppendError(std::string("Invalid field format ") + childFormat +
5737 0 : " for field " + osFieldPrefix + fieldName);
5738 0 : return false;
5739 : }
5740 :
5741 : const char *pszError =
5742 7 : GetErrorIfUnsupportedDecimal(nWidthInBytes, nPrecision);
5743 7 : if (pszError)
5744 : {
5745 0 : AppendError(pszError);
5746 0 : return false;
5747 : }
5748 :
5749 7 : return true;
5750 : }
5751 :
5752 8 : if (IsSupportForJSONObj(schema))
5753 : {
5754 8 : return true;
5755 : }
5756 :
5757 0 : AppendError("Type list for field " + osFieldPrefix + fieldName +
5758 : " is not supported.");
5759 0 : return false;
5760 : }
5761 :
5762 353 : else if (IsMap(format))
5763 : {
5764 70 : if (IsSupportForJSONObj(schema))
5765 70 : return true;
5766 :
5767 0 : AppendError("Type map for field " + osFieldPrefix + fieldName +
5768 : " is not supported.");
5769 0 : return false;
5770 : }
5771 283 : else if (IsDecimal(format))
5772 : {
5773 6 : int nPrecision = 0;
5774 6 : int nScale = 0;
5775 6 : int nWidthInBytes = 0;
5776 6 : if (!ParseDecimalFormat(format, nPrecision, nScale, nWidthInBytes))
5777 : {
5778 0 : AppendError(std::string("Invalid field format ") + format +
5779 0 : " for field " + osFieldPrefix + fieldName);
5780 0 : return false;
5781 : }
5782 :
5783 : const char *pszError =
5784 6 : GetErrorIfUnsupportedDecimal(nWidthInBytes, nPrecision);
5785 6 : if (pszError)
5786 : {
5787 0 : AppendError(pszError);
5788 0 : return false;
5789 : }
5790 :
5791 6 : return true;
5792 : }
5793 : else
5794 : {
5795 3688 : for (const auto &sType : gasArrowTypesToOGR)
5796 : {
5797 3668 : if (strcmp(format, sType.arrowType) == 0)
5798 : {
5799 257 : return true;
5800 : }
5801 : }
5802 :
5803 20 : if (IsFixedWidthBinary(format) || IsTimestamp(format))
5804 20 : return true;
5805 :
5806 0 : AppendError("Type '" + std::string(format) + "' for field " +
5807 0 : osFieldPrefix + fieldName + " is not supported.");
5808 0 : return false;
5809 : }
5810 : }
5811 :
5812 : /** Returns whether the provided ArrowSchema is supported for writing.
5813 : *
5814 : * This method exists since not all drivers may support all Arrow data types.
5815 : *
5816 : * The ArrowSchema must be of type struct (format=+s)
5817 : *
5818 : * It is recommended to call this method before calling WriteArrowBatch().
5819 : *
5820 : * This is the same as the C function OGR_L_IsArrowSchemaSupported().
5821 : *
5822 : * @param schema Schema of type struct (format = '+s')
5823 : * @param papszOptions Options (none currently). Null terminated list, or nullptr.
5824 : * @param[out] osErrorMsg Reason of the failure, when this method returns false.
5825 : * @return true if the ArrowSchema is supported for writing.
5826 : * @since 3.8
5827 : */
5828 37 : bool OGRLayer::IsArrowSchemaSupported(const struct ArrowSchema *schema,
5829 : CPL_UNUSED CSLConstList papszOptions,
5830 : std::string &osErrorMsg) const
5831 : {
5832 37 : if (!IsStructure(schema->format))
5833 : {
5834 : osErrorMsg =
5835 : "IsArrowSchemaSupported() should be called on a schema that is a "
5836 1 : "struct of fields";
5837 1 : return false;
5838 : }
5839 :
5840 36 : bool bRet = true;
5841 510 : for (int64_t i = 0; i < schema->n_children; ++i)
5842 : {
5843 474 : if (!IsArrowSchemaSupportedInternal(schema->children[i], std::string(),
5844 : osErrorMsg))
5845 0 : bRet = false;
5846 : }
5847 36 : return bRet;
5848 : }
5849 :
5850 : /************************************************************************/
5851 : /* OGR_L_IsArrowSchemaSupported() */
5852 : /************************************************************************/
5853 :
5854 : /** Returns whether the provided ArrowSchema is supported for writing.
5855 : *
5856 : * This function exists since not all drivers may support all Arrow data types.
5857 : *
5858 : * The ArrowSchema must be of type struct (format=+s)
5859 : *
5860 : * It is recommended to call this function before calling OGR_L_WriteArrowBatch().
5861 : *
5862 : * This is the same as the C++ method OGRLayer::IsArrowSchemaSupported().
5863 : *
5864 : * @param hLayer Layer.
5865 : * @param schema Schema of type struct (format = '+s')
5866 : * @param papszOptions Options (none currently). Null terminated list, or nullptr.
5867 : * @param[out] ppszErrorMsg nullptr, or pointer to a string that will contain
5868 : * the reason of the failure, when this function returns false.
5869 : * @return true if the ArrowSchema is supported for writing.
5870 : * @since 3.8
5871 : */
5872 19 : bool OGR_L_IsArrowSchemaSupported(OGRLayerH hLayer,
5873 : const struct ArrowSchema *schema,
5874 : char **papszOptions, char **ppszErrorMsg)
5875 : {
5876 19 : VALIDATE_POINTER1(hLayer, __func__, false);
5877 19 : VALIDATE_POINTER1(schema, __func__, false);
5878 :
5879 38 : std::string osErrorMsg;
5880 38 : if (!OGRLayer::FromHandle(hLayer)->IsArrowSchemaSupported(
5881 19 : schema, papszOptions, osErrorMsg))
5882 : {
5883 4 : if (ppszErrorMsg)
5884 4 : *ppszErrorMsg = VSIStrdup(osErrorMsg.c_str());
5885 4 : return false;
5886 : }
5887 : else
5888 : {
5889 15 : if (ppszErrorMsg)
5890 15 : *ppszErrorMsg = nullptr;
5891 15 : return true;
5892 : }
5893 : }
5894 :
5895 : /************************************************************************/
5896 : /* IsKnownCodedFieldDomain() */
5897 : /************************************************************************/
5898 :
5899 34 : static bool IsKnownCodedFieldDomain(OGRLayer *poLayer,
5900 : const char *arrowMetadata)
5901 : {
5902 34 : if (arrowMetadata)
5903 : {
5904 6 : const auto oMetadata = OGRParseArrowMetadata(arrowMetadata);
5905 6 : for (const auto &oIter : oMetadata)
5906 : {
5907 6 : if (oIter.first == MD_GDAL_OGR_DOMAIN_NAME)
5908 : {
5909 6 : auto poDS = poLayer->GetDataset();
5910 6 : if (poDS)
5911 : {
5912 : const auto poFieldDomain =
5913 6 : poDS->GetFieldDomain(oIter.second);
5914 12 : if (poFieldDomain &&
5915 6 : poFieldDomain->GetDomainType() == OFDT_CODED)
5916 : {
5917 6 : return true;
5918 : }
5919 : }
5920 : }
5921 : }
5922 : }
5923 28 : return false;
5924 : }
5925 :
5926 : /************************************************************************/
5927 : /* OGRLayer::CreateFieldFromArrowSchema() */
5928 : /************************************************************************/
5929 :
5930 : //! @cond Doxygen_Suppress
5931 447 : bool OGRLayer::CreateFieldFromArrowSchemaInternal(
5932 : const struct ArrowSchema *schema, const std::string &osFieldPrefix,
5933 : CSLConstList papszOptions)
5934 : {
5935 447 : const char *fieldName = schema->name;
5936 447 : const char *format = schema->format;
5937 447 : if (IsStructure(format))
5938 : {
5939 10 : const std::string osNewPrefix(osFieldPrefix + fieldName + ".");
5940 21 : for (int64_t i = 0; i < schema->n_children; ++i)
5941 : {
5942 16 : if (!CreateFieldFromArrowSchemaInternal(schema->children[i],
5943 : osNewPrefix, papszOptions))
5944 0 : return false;
5945 : }
5946 5 : return true;
5947 : }
5948 :
5949 884 : CPLStringList aosNativeTypes;
5950 442 : auto poLayer = const_cast<OGRLayer *>(this);
5951 442 : auto poDS = poLayer->GetDataset();
5952 442 : if (poDS)
5953 : {
5954 442 : auto poDriver = poDS->GetDriver();
5955 442 : if (poDriver)
5956 : {
5957 : const char *pszMetadataItem =
5958 442 : poDriver->GetMetadataItem(GDAL_DMD_CREATIONFIELDDATATYPES);
5959 442 : if (pszMetadataItem)
5960 442 : aosNativeTypes = CSLTokenizeString2(pszMetadataItem, " ", 0);
5961 : }
5962 : }
5963 :
5964 457 : if (schema->dictionary &&
5965 15 : !IsKnownCodedFieldDomain(poLayer, schema->metadata))
5966 : {
5967 13 : if (!IsValidDictionaryIndexType(format))
5968 : {
5969 0 : CPLError(CE_Failure, CPLE_NotSupported,
5970 : "Dictionary only supported if the parent is of "
5971 : "type [U]Int[8|16|32|64]");
5972 0 : return false;
5973 : }
5974 :
5975 13 : schema = schema->dictionary;
5976 13 : format = schema->format;
5977 : }
5978 :
5979 442 : const auto AddField = [this, schema, fieldName, &aosNativeTypes,
5980 : &osFieldPrefix, poDS](OGRFieldType eTypeIn,
5981 : OGRFieldSubType eSubTypeIn,
5982 3124 : int nWidth, int nPrecision)
5983 : {
5984 442 : const char *pszTypeName = OGRFieldDefn::GetFieldTypeName(eTypeIn);
5985 442 : auto eTypeOut = eTypeIn;
5986 442 : auto eSubTypeOut = eSubTypeIn;
5987 884 : if (!aosNativeTypes.empty() &&
5988 442 : aosNativeTypes.FindString(pszTypeName) < 0)
5989 : {
5990 20 : eTypeOut = OFTString;
5991 20 : eSubTypeOut =
5992 15 : (eTypeIn == OFTIntegerList || eTypeIn == OFTInteger64List ||
5993 8 : eTypeIn == OFTRealList || eTypeIn == OFTStringList)
5994 35 : ? OFSTJSON
5995 : : OFSTNone;
5996 : }
5997 :
5998 884 : const std::string osWantedOGRFieldName = osFieldPrefix + fieldName;
5999 884 : OGRFieldDefn oFieldDefn(osWantedOGRFieldName.c_str(), eTypeOut);
6000 442 : oFieldDefn.SetSubType(eSubTypeOut);
6001 442 : if (eTypeOut == eTypeIn && eSubTypeOut == eSubTypeIn)
6002 : {
6003 422 : oFieldDefn.SetWidth(nWidth);
6004 422 : oFieldDefn.SetPrecision(nPrecision);
6005 : }
6006 442 : oFieldDefn.SetNullable((schema->flags & ARROW_FLAG_NULLABLE) != 0);
6007 :
6008 442 : if (schema->metadata)
6009 : {
6010 50 : const auto oMetadata = OGRParseArrowMetadata(schema->metadata);
6011 51 : for (const auto &oIter : oMetadata)
6012 : {
6013 26 : if (oIter.first == MD_GDAL_OGR_ALTERNATIVE_NAME)
6014 2 : oFieldDefn.SetAlternativeName(oIter.second.c_str());
6015 24 : else if (oIter.first == MD_GDAL_OGR_COMMENT)
6016 2 : oFieldDefn.SetComment(oIter.second);
6017 22 : else if (oIter.first == MD_GDAL_OGR_DEFAULT)
6018 2 : oFieldDefn.SetDefault(oIter.second.c_str());
6019 20 : else if (oIter.first == MD_GDAL_OGR_SUBTYPE)
6020 : {
6021 5 : if (eTypeIn == eTypeOut)
6022 : {
6023 4 : const auto &osSubType = oIter.second;
6024 4 : for (auto eSubType = OFSTNone;
6025 15 : eSubType <= OFSTMaxSubType;)
6026 : {
6027 15 : if (OGRFieldDefn::GetFieldSubTypeName(eSubType) ==
6028 : osSubType)
6029 : {
6030 4 : oFieldDefn.SetSubType(eSubType);
6031 4 : break;
6032 : }
6033 11 : if (eSubType == OFSTMaxSubType)
6034 0 : break;
6035 : else
6036 11 : eSubType =
6037 11 : static_cast<OGRFieldSubType>(eSubType + 1);
6038 : }
6039 : }
6040 : }
6041 15 : else if (oIter.first == MD_GDAL_OGR_WIDTH)
6042 4 : oFieldDefn.SetWidth(atoi(oIter.second.c_str()));
6043 11 : else if (oIter.first == MD_GDAL_OGR_UNIQUE)
6044 2 : oFieldDefn.SetUnique(oIter.second == "true");
6045 9 : else if (oIter.first == MD_GDAL_OGR_DOMAIN_NAME)
6046 : {
6047 2 : if (poDS && poDS->GetFieldDomain(oIter.second))
6048 2 : oFieldDefn.SetDomainName(oIter.second);
6049 : }
6050 13 : else if (oIter.first == ARROW_EXTENSION_NAME_KEY &&
6051 6 : oIter.second == EXTENSION_NAME_ARROW_JSON)
6052 : {
6053 6 : oFieldDefn.SetSubType(OFSTJSON);
6054 : }
6055 : else
6056 : {
6057 1 : CPLDebug("OGR", "Unknown field metadata: %s",
6058 : oIter.first.c_str());
6059 : }
6060 : }
6061 : }
6062 442 : auto poLayerDefn = GetLayerDefn();
6063 442 : const int nFieldCountBefore = poLayerDefn->GetFieldCount();
6064 884 : if (CreateField(&oFieldDefn) != OGRERR_NONE ||
6065 442 : nFieldCountBefore + 1 != poLayerDefn->GetFieldCount())
6066 : {
6067 0 : return false;
6068 : }
6069 : const char *pszActualFieldName =
6070 442 : poLayerDefn->GetFieldDefn(nFieldCountBefore)->GetNameRef();
6071 442 : if (pszActualFieldName != osWantedOGRFieldName)
6072 : {
6073 : m_poPrivate
6074 1 : ->m_oMapArrowFieldNameToOGRFieldName[osWantedOGRFieldName] =
6075 1 : pszActualFieldName;
6076 : }
6077 442 : return true;
6078 442 : };
6079 :
6080 7947 : for (const auto &sType : gasArrowTypesToOGR)
6081 : {
6082 7716 : if (strcmp(format, sType.arrowType) == 0)
6083 : {
6084 211 : return AddField(sType.eType, sType.eSubType, 0, 0);
6085 : }
6086 : }
6087 :
6088 231 : if (IsMap(format))
6089 : {
6090 70 : return AddField(OFTString, OFSTJSON, 0, 0);
6091 : }
6092 :
6093 161 : if (IsTimestamp(format))
6094 : {
6095 20 : return AddField(OFTDateTime, OFSTNone, 0, 0);
6096 : }
6097 :
6098 141 : if (IsFixedWidthBinary(format))
6099 : {
6100 3 : return AddField(OFTBinary, OFSTNone, GetFixedWithBinary(format), 0);
6101 : }
6102 :
6103 138 : if (IsList(format) || IsLargeList(format) || IsFixedSizeList(format))
6104 : {
6105 132 : const char *childFormat = schema->children[0]->format;
6106 1103 : for (const auto &sType : gasListTypes)
6107 : {
6108 1088 : if (childFormat[0] == sType.arrowLetter && childFormat[1] == 0)
6109 : {
6110 117 : return AddField(sType.eType, sType.eSubType, 0, 0);
6111 : }
6112 : }
6113 :
6114 15 : if (IsDecimal(childFormat))
6115 : {
6116 7 : int nPrecision = 0;
6117 7 : int nScale = 0;
6118 7 : int nWidthInBytes = 0;
6119 7 : if (!ParseDecimalFormat(childFormat, nPrecision, nScale,
6120 : nWidthInBytes))
6121 : {
6122 0 : CPLError(CE_Failure, CPLE_AppDefined, "%s",
6123 0 : (std::string("Invalid field format ") + format +
6124 0 : " for field " + osFieldPrefix + fieldName)
6125 : .c_str());
6126 0 : return false;
6127 : }
6128 :
6129 : const char *pszError =
6130 7 : GetErrorIfUnsupportedDecimal(nWidthInBytes, nPrecision);
6131 7 : if (pszError)
6132 : {
6133 0 : CPLError(CE_Failure, CPLE_NotSupported, "%s", pszError);
6134 0 : return false;
6135 : }
6136 :
6137 : // DBF convention: add space for negative sign and decimal separator
6138 7 : return AddField(OFTRealList, OFSTNone, nPrecision + 2, nScale);
6139 : }
6140 :
6141 8 : if (IsSupportForJSONObj(schema->children[0]))
6142 : {
6143 8 : return AddField(OFTString, OFSTJSON, 0, 0);
6144 : }
6145 :
6146 0 : CPLError(CE_Failure, CPLE_NotSupported, "%s",
6147 0 : ("List of type '" + std::string(childFormat) + "' for field " +
6148 0 : osFieldPrefix + fieldName + " is not supported.")
6149 : .c_str());
6150 0 : return false;
6151 : }
6152 :
6153 6 : if (IsDecimal(format))
6154 : {
6155 6 : int nPrecision = 0;
6156 6 : int nScale = 0;
6157 6 : int nWidthInBytes = 0;
6158 6 : if (!ParseDecimalFormat(format, nPrecision, nScale, nWidthInBytes))
6159 : {
6160 0 : CPLError(CE_Failure, CPLE_AppDefined, "%s",
6161 0 : (std::string("Invalid field format ") + format +
6162 0 : " for field " + osFieldPrefix + fieldName)
6163 : .c_str());
6164 0 : return false;
6165 : }
6166 :
6167 : const char *pszError =
6168 6 : GetErrorIfUnsupportedDecimal(nWidthInBytes, nPrecision);
6169 6 : if (pszError)
6170 : {
6171 0 : CPLError(CE_Failure, CPLE_NotSupported, "%s", pszError);
6172 0 : return false;
6173 : }
6174 :
6175 : // DBF convention: add space for negative sign and decimal separator
6176 6 : return AddField(OFTReal, OFSTNone, nPrecision + 2, nScale);
6177 : }
6178 :
6179 0 : CPLError(CE_Failure, CPLE_NotSupported, "%s",
6180 0 : ("Type '" + std::string(format) + "' for field " + osFieldPrefix +
6181 0 : fieldName + " is not supported.")
6182 : .c_str());
6183 0 : return false;
6184 : }
6185 :
6186 : //! @endcond
6187 :
6188 : /** Creates a field from an ArrowSchema.
6189 : *
6190 : * This should only be used for attribute fields. Geometry fields should
6191 : * be created with CreateGeomField(). The FID field should also not be
6192 : * passed with this method.
6193 : *
6194 : * Contrary to the IsArrowSchemaSupported() and WriteArrowBatch() methods, the
6195 : * passed schema must be for an individual field, and thus, is *not* of type
6196 : * struct (format=+s) (unless writing a set of fields grouped together in the
6197 : * same structure).
6198 : *
6199 : * Additional field metadata can be speciffed through the ArrowSchema::metadata
6200 : * field with the potential following items:
6201 : * <ul>
6202 : * <li>"GDAL:OGR:alternative_name": value of
6203 : * OGRFieldDefn::GetAlternativeNameRef()</li>
6204 : * <li>"GDAL:OGR:comment": value of OGRFieldDefn::GetComment()</li>
6205 : * <li>"GDAL:OGR:default": value of OGRFieldDefn::GetDefault()</li>
6206 : * <li>"GDAL:OGR:subtype": value of OGRFieldDefn::GetSubType()</li>
6207 : * <li>"GDAL:OGR:width": value of OGRFieldDefn::GetWidth() (serialized as a
6208 : * string)</li>
6209 : * <li>"GDAL:OGR:unique": value of OGRFieldDefn::IsUnique() (serialized as
6210 : * "true" or "false")</li>
6211 : * <li>"GDAL:OGR:domain_name": value of OGRFieldDefn::GetDomainName()</li>
6212 : * </ul>
6213 : *
6214 : * This method and CreateField() are mutually exclusive in the same session.
6215 : *
6216 : * This method is the same as the C function OGR_L_CreateFieldFromArrowSchema().
6217 : *
6218 : * @param schema Schema of the field to create.
6219 : * @param papszOptions Options (none currently). Null terminated list, or nullptr.
6220 : * @return true in case of success
6221 : * @since 3.8
6222 : */
6223 431 : bool OGRLayer::CreateFieldFromArrowSchema(const struct ArrowSchema *schema,
6224 : CSLConstList papszOptions)
6225 : {
6226 862 : return CreateFieldFromArrowSchemaInternal(schema, std::string(),
6227 862 : papszOptions);
6228 : }
6229 :
6230 : /************************************************************************/
6231 : /* OGR_L_CreateFieldFromArrowSchema() */
6232 : /************************************************************************/
6233 :
6234 : /** Creates a field from an ArrowSchema.
6235 : *
6236 : * This should only be used for attribute fields. Geometry fields should
6237 : * be created with CreateGeomField(). The FID field should also not be
6238 : * passed with this method.
6239 : *
6240 : * Contrary to the IsArrowSchemaSupported() and WriteArrowBatch() methods, the
6241 : * passed schema must be for an individual field, and thus, is *not* of type
6242 : * struct (format=+s) (unless writing a set of fields grouped together in the
6243 : * same structure).
6244 : *
6245 : * Additional field metadata can be speciffed through the ArrowSchema::metadata
6246 : * field with the potential following items:
6247 : * <ul>
6248 : * <li>"GDAL:OGR:alternative_name": value of
6249 : * OGRFieldDefn::GetAlternativeNameRef()</li>
6250 : * <li>"GDAL:OGR:comment": value of OGRFieldDefn::GetComment()</li>
6251 : * <li>"GDAL:OGR:default": value of OGRFieldDefn::GetDefault()</li>
6252 : * <li>"GDAL:OGR:subtype": value of OGRFieldDefn::GetSubType()</li>
6253 : * <li>"GDAL:OGR:width": value of OGRFieldDefn::GetWidth() (serialized as a
6254 : * string)</li>
6255 : * <li>"GDAL:OGR:unique": value of OGRFieldDefn::IsUnique() (serialized as
6256 : * "true" or "false")</li>
6257 : * <li>"GDAL:OGR:domain_name": value of OGRFieldDefn::GetDomainName()</li>
6258 : * </ul>
6259 : *
6260 : * This method and CreateField() are mutually exclusive in the same session.
6261 : *
6262 : * This method is the same as the C++ method OGRLayer::CreateFieldFromArrowSchema().
6263 : *
6264 : * @param hLayer Layer.
6265 : * @param schema Schema of the field to create.
6266 : * @param papszOptions Options (none currently). Null terminated list, or nullptr.
6267 : * @return true in case of success
6268 : * @since 3.8
6269 : */
6270 539 : bool OGR_L_CreateFieldFromArrowSchema(OGRLayerH hLayer,
6271 : const struct ArrowSchema *schema,
6272 : char **papszOptions)
6273 : {
6274 539 : VALIDATE_POINTER1(hLayer, __func__, false);
6275 539 : VALIDATE_POINTER1(schema, __func__, false);
6276 :
6277 1078 : return OGRLayer::FromHandle(hLayer)->CreateFieldFromArrowSchema(
6278 539 : schema, papszOptions);
6279 : }
6280 :
6281 : /************************************************************************/
6282 : /* BuildOGRFieldInfo() */
6283 : /************************************************************************/
6284 :
6285 : constexpr int FID_COLUMN_SPECIAL_OGR_FIELD_IDX = -2;
6286 :
6287 : struct FieldInfo
6288 : {
6289 : std::string osName{};
6290 : int iOGRFieldIdx = -1;
6291 : const char *format = nullptr;
6292 : OGRFieldType eNominalFieldType =
6293 : OFTMaxType; // OGR data type that would best match the Arrow type
6294 : OGRFieldType eTargetFieldType =
6295 : OFTMaxType; // actual OGR data type of the layer field
6296 : // OGR data type of the feature passed to FillFeature()
6297 : OGRFieldType eSetFeatureFieldType = OFTMaxType;
6298 : bool bIsGeomCol = false;
6299 : bool bUseDictionary = false;
6300 : bool bUseStringOptim = false;
6301 : int nWidthInBytes = 0; // only used for decimal fields
6302 : int nPrecision = 0; // only used for decimal fields
6303 : int nScale = 0; // only used for decimal fields
6304 : };
6305 :
6306 749 : static bool BuildOGRFieldInfo(
6307 : const struct ArrowSchema *schema, struct ArrowArray *array,
6308 : const OGRFeatureDefn *poFeatureDefn, const std::string &osFieldPrefix,
6309 : const CPLStringList &aosNativeTypes, bool &bFallbackTypesUsed,
6310 : std::vector<FieldInfo> &asFieldInfo, const char *pszFIDName,
6311 : const char *pszGeomFieldName, OGRLayer *poLayer,
6312 : const std::map<std::string, std::string> &oMapArrowFieldNameToOGRFieldName,
6313 : const struct ArrowSchema *&schemaFIDColumn,
6314 : struct ArrowArray *&arrayFIDColumn)
6315 : {
6316 749 : const char *fieldName = schema->name;
6317 749 : const char *format = schema->format;
6318 749 : if (IsStructure(format))
6319 : {
6320 18 : const std::string osNewPrefix(osFieldPrefix + fieldName + ".");
6321 39 : for (int64_t i = 0; i < array->n_children; ++i)
6322 : {
6323 30 : if (!BuildOGRFieldInfo(schema->children[i], array->children[i],
6324 : poFeatureDefn, osNewPrefix, aosNativeTypes,
6325 : bFallbackTypesUsed, asFieldInfo, pszFIDName,
6326 : pszGeomFieldName, poLayer,
6327 : oMapArrowFieldNameToOGRFieldName,
6328 : schemaFIDColumn, arrayFIDColumn))
6329 : {
6330 0 : return false;
6331 : }
6332 : }
6333 9 : return true;
6334 : }
6335 :
6336 1480 : FieldInfo sInfo;
6337 :
6338 759 : if (schema->dictionary &&
6339 19 : !IsKnownCodedFieldDomain(poLayer, schema->metadata))
6340 : {
6341 15 : if (!IsValidDictionaryIndexType(format))
6342 : {
6343 0 : CPLError(CE_Failure, CPLE_NotSupported,
6344 : "Dictionary only supported if the parent is of "
6345 : "type [U]Int[8|16|32|64]");
6346 0 : return false;
6347 : }
6348 :
6349 15 : sInfo.bUseDictionary = true;
6350 15 : schema = schema->dictionary;
6351 15 : format = schema->format;
6352 15 : array = array->dictionary;
6353 : }
6354 :
6355 740 : sInfo.osName = osFieldPrefix + fieldName;
6356 740 : sInfo.format = format;
6357 740 : if (pszFIDName && sInfo.osName == pszFIDName)
6358 : {
6359 27 : if (IsInt32(format) || IsInt64(format))
6360 : {
6361 26 : sInfo.iOGRFieldIdx = FID_COLUMN_SPECIAL_OGR_FIELD_IDX;
6362 26 : schemaFIDColumn = schema;
6363 26 : arrayFIDColumn = array;
6364 : }
6365 : else
6366 : {
6367 1 : CPLError(CE_Failure, CPLE_AppDefined,
6368 : "FID column '%s' should be of Arrow format 'i' "
6369 : "(int32) or 'l' (int64)",
6370 : sInfo.osName.c_str());
6371 1 : return false;
6372 : }
6373 : }
6374 : else
6375 : {
6376 : const std::string &osExpectedOGRFieldName =
6377 2138 : [&oMapArrowFieldNameToOGRFieldName, &sInfo]() -> const std::string &
6378 : {
6379 : const auto oIter =
6380 713 : oMapArrowFieldNameToOGRFieldName.find(sInfo.osName);
6381 713 : if (oIter != oMapArrowFieldNameToOGRFieldName.end())
6382 1 : return oIter->second;
6383 712 : return sInfo.osName;
6384 713 : }();
6385 713 : sInfo.iOGRFieldIdx =
6386 713 : poFeatureDefn->GetFieldIndex(osExpectedOGRFieldName.c_str());
6387 713 : if (sInfo.iOGRFieldIdx >= 0)
6388 : {
6389 643 : bool bTypeOK = false;
6390 : const auto eOGRType =
6391 643 : poFeatureDefn->GetFieldDefn(sInfo.iOGRFieldIdx)->GetType();
6392 643 : sInfo.eTargetFieldType = eOGRType;
6393 11561 : for (const auto &sType : gasArrowTypesToOGR)
6394 : {
6395 11218 : if (strcmp(format, sType.arrowType) == 0)
6396 : {
6397 300 : sInfo.bUseStringOptim = sType.eType == OFTString;
6398 300 : sInfo.eNominalFieldType = sType.eType;
6399 300 : if (eOGRType == sInfo.eNominalFieldType)
6400 : {
6401 272 : bTypeOK = true;
6402 272 : break;
6403 : }
6404 28 : else if (eOGRType == OFTString)
6405 : {
6406 4 : bFallbackTypesUsed = true;
6407 4 : bTypeOK = true;
6408 4 : break;
6409 : }
6410 24 : else if (eOGRType == OFTInteger &&
6411 10 : sType.eType == OFTInteger64)
6412 : {
6413 : // Potentially lossy.
6414 4 : CPLDebug("OGR",
6415 : "For field %s, writing from Arrow array of "
6416 : "type Int64 into OGR Int32 field. "
6417 : "Potentially loss conversion can happen",
6418 : sInfo.osName.c_str());
6419 4 : bFallbackTypesUsed = true;
6420 4 : bTypeOK = true;
6421 4 : break;
6422 : }
6423 20 : else if (eOGRType == OFTInteger && sType.eType == OFTReal)
6424 : {
6425 : // Potentially lossy.
6426 6 : CPLDebug("OGR",
6427 : "For field %s, writing from Arrow array of "
6428 : "type Real into OGR Int32 field. "
6429 : "Potentially loss conversion can happen",
6430 : sInfo.osName.c_str());
6431 6 : bFallbackTypesUsed = true;
6432 6 : bTypeOK = true;
6433 6 : break;
6434 : }
6435 14 : else if (eOGRType == OFTInteger64 && sType.eType == OFTReal)
6436 : {
6437 : // Potentially lossy.
6438 6 : CPLDebug("OGR",
6439 : "For field %s, writing from Arrow array of "
6440 : "type Real into OGR Int64 field. "
6441 : "Potentially loss conversion can happen",
6442 : sInfo.osName.c_str());
6443 6 : bFallbackTypesUsed = true;
6444 6 : bTypeOK = true;
6445 6 : break;
6446 : }
6447 8 : else if (eOGRType == OFTReal && sType.eType == OFTInteger64)
6448 : {
6449 : // Potentially lossy.
6450 4 : CPLDebug("OGR",
6451 : "For field %s, writing from Arrow array of "
6452 : "type Int64 into OGR Real field. "
6453 : "Potentially loss conversion can happen",
6454 : sInfo.osName.c_str());
6455 4 : bFallbackTypesUsed = true;
6456 4 : bTypeOK = true;
6457 4 : break;
6458 : }
6459 4 : else if ((eOGRType == OFTInteger64 ||
6460 4 : eOGRType == OFTReal) &&
6461 4 : sType.eType == OFTInteger)
6462 : {
6463 : // Non-lossy
6464 4 : bFallbackTypesUsed = true;
6465 4 : bTypeOK = true;
6466 4 : break;
6467 : }
6468 : else
6469 : {
6470 0 : CPLError(CE_Failure, CPLE_AppDefined,
6471 : "For field %s, OGR field type is %s whereas "
6472 : "Arrow type implies %s",
6473 : sInfo.osName.c_str(),
6474 : OGR_GetFieldTypeName(eOGRType),
6475 0 : OGR_GetFieldTypeName(sType.eType));
6476 0 : return false;
6477 : }
6478 : }
6479 : }
6480 :
6481 643 : if (!bTypeOK && IsMap(format))
6482 : {
6483 106 : sInfo.eNominalFieldType = OFTString;
6484 106 : if (eOGRType == sInfo.eNominalFieldType)
6485 : {
6486 106 : bTypeOK = true;
6487 : }
6488 : else
6489 : {
6490 0 : CPLError(CE_Failure, CPLE_AppDefined,
6491 : "For field %s, OGR field type is %s whereas "
6492 : "Arrow type implies %s",
6493 : sInfo.osName.c_str(),
6494 : OGR_GetFieldTypeName(eOGRType),
6495 : OGR_GetFieldTypeName(OFTString));
6496 0 : return false;
6497 : }
6498 : }
6499 :
6500 643 : if (!bTypeOK && IsTimestamp(format))
6501 : {
6502 32 : sInfo.eNominalFieldType = OFTDateTime;
6503 32 : if (eOGRType == sInfo.eNominalFieldType)
6504 : {
6505 31 : bTypeOK = true;
6506 : }
6507 1 : else if (eOGRType == OFTString)
6508 : {
6509 1 : bFallbackTypesUsed = true;
6510 1 : bTypeOK = true;
6511 : }
6512 : else
6513 : {
6514 0 : CPLError(CE_Failure, CPLE_AppDefined,
6515 : "For field %s, OGR field type is %s whereas "
6516 : "Arrow type implies %s",
6517 : sInfo.osName.c_str(),
6518 : OGR_GetFieldTypeName(eOGRType),
6519 : OGR_GetFieldTypeName(OFTDateTime));
6520 0 : return false;
6521 : }
6522 : }
6523 :
6524 643 : if (!bTypeOK && IsFixedWidthBinary(format))
6525 : {
6526 5 : sInfo.eNominalFieldType = OFTBinary;
6527 5 : if (eOGRType == sInfo.eNominalFieldType)
6528 : {
6529 5 : bTypeOK = true;
6530 : }
6531 0 : else if (eOGRType == OFTString)
6532 : {
6533 0 : bFallbackTypesUsed = true;
6534 0 : bTypeOK = true;
6535 : }
6536 : else
6537 : {
6538 0 : CPLError(CE_Failure, CPLE_AppDefined,
6539 : "For field %s, OGR field type is %s whereas "
6540 : "Arrow type implies %s",
6541 : sInfo.osName.c_str(),
6542 : OGR_GetFieldTypeName(eOGRType),
6543 : OGR_GetFieldTypeName(OFTBinary));
6544 0 : return false;
6545 : }
6546 : }
6547 :
6548 716 : if (!bTypeOK && (IsList(format) || IsLargeList(format) ||
6549 73 : IsFixedSizeList(format)))
6550 : {
6551 190 : const char *childFormat = schema->children[0]->format;
6552 1565 : for (const auto &sType : gasListTypes)
6553 : {
6554 1544 : if (childFormat[0] == sType.arrowLetter &&
6555 169 : childFormat[1] == 0)
6556 : {
6557 169 : sInfo.eNominalFieldType = sType.eType;
6558 169 : if (eOGRType == sInfo.eNominalFieldType)
6559 : {
6560 154 : bTypeOK = true;
6561 154 : break;
6562 : }
6563 15 : else if (eOGRType == OFTString)
6564 : {
6565 15 : bFallbackTypesUsed = true;
6566 15 : bTypeOK = true;
6567 15 : break;
6568 : }
6569 : else
6570 : {
6571 0 : CPLError(CE_Failure, CPLE_AppDefined,
6572 : "For field %s, OGR field type is %s "
6573 : "whereas "
6574 : "Arrow type implies %s",
6575 : sInfo.osName.c_str(),
6576 : OGR_GetFieldTypeName(eOGRType),
6577 0 : OGR_GetFieldTypeName(sType.eType));
6578 0 : return false;
6579 : }
6580 : }
6581 : }
6582 :
6583 190 : if (!bTypeOK && IsDecimal(childFormat))
6584 : {
6585 11 : if (!ParseDecimalFormat(childFormat, sInfo.nPrecision,
6586 : sInfo.nScale, sInfo.nWidthInBytes))
6587 : {
6588 0 : CPLError(CE_Failure, CPLE_AppDefined, "%s",
6589 0 : (std::string("Invalid field format ") +
6590 0 : childFormat + " for field " + osFieldPrefix +
6591 : fieldName)
6592 : .c_str());
6593 0 : return false;
6594 : }
6595 :
6596 11 : const char *pszError = GetErrorIfUnsupportedDecimal(
6597 : sInfo.nWidthInBytes, sInfo.nPrecision);
6598 11 : if (pszError)
6599 : {
6600 0 : CPLError(CE_Failure, CPLE_NotSupported, "%s", pszError);
6601 0 : return false;
6602 : }
6603 :
6604 11 : sInfo.eNominalFieldType = OFTRealList;
6605 11 : if (eOGRType == sInfo.eNominalFieldType)
6606 : {
6607 11 : bTypeOK = true;
6608 : }
6609 0 : else if (eOGRType == OFTString)
6610 : {
6611 0 : bFallbackTypesUsed = true;
6612 0 : bTypeOK = true;
6613 : }
6614 : else
6615 : {
6616 0 : CPLError(CE_Failure, CPLE_AppDefined,
6617 : "For field %s, OGR field type is %s whereas "
6618 : "Arrow type implies %s",
6619 : sInfo.osName.c_str(),
6620 : OGR_GetFieldTypeName(eOGRType),
6621 : OGR_GetFieldTypeName(OFTRealList));
6622 0 : return false;
6623 : }
6624 : }
6625 :
6626 190 : if (!bTypeOK && IsSupportForJSONObj(schema->children[0]))
6627 : {
6628 10 : sInfo.eNominalFieldType = OFTString;
6629 10 : if (eOGRType == sInfo.eNominalFieldType)
6630 : {
6631 10 : bTypeOK = true;
6632 : }
6633 : else
6634 : {
6635 0 : CPLError(CE_Failure, CPLE_AppDefined,
6636 : "For field %s, OGR field type is %s whereas "
6637 : "Arrow type implies %s",
6638 : sInfo.osName.c_str(),
6639 : OGR_GetFieldTypeName(eOGRType),
6640 : OGR_GetFieldTypeName(OFTString));
6641 0 : return false;
6642 : }
6643 : }
6644 :
6645 190 : if (!bTypeOK)
6646 : {
6647 0 : CPLError(CE_Failure, CPLE_NotSupported, "%s",
6648 0 : ("List of type '" + std::string(childFormat) +
6649 0 : "' for field " + osFieldPrefix + fieldName +
6650 : " is not supported.")
6651 : .c_str());
6652 0 : return false;
6653 : }
6654 : }
6655 :
6656 643 : if (!bTypeOK && IsDecimal(format))
6657 : {
6658 10 : if (!ParseDecimalFormat(format, sInfo.nPrecision, sInfo.nScale,
6659 : sInfo.nWidthInBytes))
6660 : {
6661 0 : CPLError(CE_Failure, CPLE_AppDefined, "%s",
6662 0 : (std::string("Invalid field format ") + format +
6663 0 : " for field " + osFieldPrefix + fieldName)
6664 : .c_str());
6665 0 : return false;
6666 : }
6667 :
6668 10 : const char *pszError = GetErrorIfUnsupportedDecimal(
6669 : sInfo.nWidthInBytes, sInfo.nPrecision);
6670 10 : if (pszError)
6671 : {
6672 0 : CPLError(CE_Failure, CPLE_NotSupported, "%s", pszError);
6673 0 : return false;
6674 : }
6675 :
6676 10 : sInfo.eNominalFieldType = OFTReal;
6677 10 : if (eOGRType == sInfo.eNominalFieldType)
6678 : {
6679 10 : bTypeOK = true;
6680 : }
6681 0 : else if (eOGRType == OFTString)
6682 : {
6683 0 : bFallbackTypesUsed = true;
6684 0 : bTypeOK = true;
6685 : }
6686 : else
6687 : {
6688 0 : CPLError(CE_Failure, CPLE_AppDefined,
6689 : "For field %s, OGR field type is %s whereas "
6690 : "Arrow type implies %s",
6691 : sInfo.osName.c_str(),
6692 : OGR_GetFieldTypeName(eOGRType),
6693 : OGR_GetFieldTypeName(OFTReal));
6694 0 : return false;
6695 : }
6696 : }
6697 :
6698 643 : if (!bTypeOK)
6699 : {
6700 0 : CPLError(CE_Failure, CPLE_NotSupported, "%s",
6701 0 : ("Type '" + std::string(format) + "' for field " +
6702 0 : osFieldPrefix + fieldName + " is not supported.")
6703 : .c_str());
6704 0 : return false;
6705 : }
6706 : }
6707 : else
6708 : {
6709 70 : sInfo.iOGRFieldIdx = poFeatureDefn->GetGeomFieldIndex(
6710 70 : osExpectedOGRFieldName.c_str());
6711 70 : if (sInfo.iOGRFieldIdx < 0)
6712 : {
6713 50 : if (pszGeomFieldName && pszGeomFieldName == sInfo.osName)
6714 : {
6715 45 : if (poFeatureDefn->GetGeomFieldCount() == 0)
6716 : {
6717 0 : CPLError(CE_Failure, CPLE_AppDefined,
6718 : "Cannot find OGR geometry field for Arrow "
6719 : "array %s",
6720 : sInfo.osName.c_str());
6721 0 : return false;
6722 : }
6723 45 : sInfo.iOGRFieldIdx = 0;
6724 : }
6725 : else
6726 : {
6727 : // Check if ARROW:extension:name = ogc.wkb or geoarrow.wkb
6728 5 : const char *pabyMetadata = schema->metadata;
6729 5 : if (pabyMetadata)
6730 : {
6731 : const auto oMetadata =
6732 5 : OGRParseArrowMetadata(pabyMetadata);
6733 5 : auto oIter = oMetadata.find(ARROW_EXTENSION_NAME_KEY);
6734 10 : if (oIter != oMetadata.end() &&
6735 5 : (oIter->second == EXTENSION_NAME_OGC_WKB ||
6736 0 : oIter->second == EXTENSION_NAME_GEOARROW_WKB))
6737 : {
6738 5 : if (poFeatureDefn->GetGeomFieldCount() == 0)
6739 : {
6740 0 : CPLError(CE_Failure, CPLE_AppDefined,
6741 : "Cannot find OGR geometry field "
6742 : "for Arrow array %s",
6743 : sInfo.osName.c_str());
6744 0 : return false;
6745 : }
6746 5 : sInfo.iOGRFieldIdx = 0;
6747 : }
6748 : }
6749 : }
6750 :
6751 50 : if (sInfo.iOGRFieldIdx < 0)
6752 : {
6753 0 : CPLError(CE_Failure, CPLE_AppDefined,
6754 : "Cannot find OGR field for Arrow array %s",
6755 : sInfo.osName.c_str());
6756 0 : return false;
6757 : }
6758 : }
6759 :
6760 70 : if (!IsBinary(format) && !IsLargeBinary(format))
6761 : {
6762 0 : CPLError(CE_Failure, CPLE_AppDefined,
6763 : "Geometry column '%s' should be of Arrow format "
6764 : "'z' (binary) or 'Z' (large binary)",
6765 : sInfo.osName.c_str());
6766 0 : return false;
6767 : }
6768 70 : sInfo.bIsGeomCol = true;
6769 : }
6770 : }
6771 :
6772 739 : asFieldInfo.emplace_back(std::move(sInfo));
6773 739 : return true;
6774 : }
6775 :
6776 : /************************************************************************/
6777 : /* GetUInt64Value() */
6778 : /************************************************************************/
6779 :
6780 90 : static inline uint64_t GetUInt64Value(const struct ArrowSchema *schema,
6781 : const struct ArrowArray *array,
6782 : size_t iFeature)
6783 : {
6784 90 : uint64_t nVal = 0;
6785 90 : CPLAssert(schema->format[1] == 0);
6786 90 : switch (schema->format[0])
6787 : {
6788 8 : case ARROW_LETTER_INT8:
6789 8 : nVal = GetValue<int8_t>(array, iFeature);
6790 8 : break;
6791 8 : case ARROW_LETTER_UINT8:
6792 8 : nVal = GetValue<uint8_t>(array, iFeature);
6793 8 : break;
6794 8 : case ARROW_LETTER_INT16:
6795 8 : nVal = GetValue<int16_t>(array, iFeature);
6796 8 : break;
6797 8 : case ARROW_LETTER_UINT16:
6798 8 : nVal = GetValue<uint16_t>(array, iFeature);
6799 8 : break;
6800 34 : case ARROW_LETTER_INT32:
6801 34 : nVal = GetValue<int32_t>(array, iFeature);
6802 34 : break;
6803 8 : case ARROW_LETTER_UINT32:
6804 8 : nVal = GetValue<uint32_t>(array, iFeature);
6805 8 : break;
6806 8 : case ARROW_LETTER_INT64:
6807 8 : nVal = GetValue<int64_t>(array, iFeature);
6808 8 : break;
6809 8 : case ARROW_LETTER_UINT64:
6810 8 : nVal = GetValue<uint64_t>(array, iFeature);
6811 8 : break;
6812 0 : default:
6813 : // Shouldn't happen given checks in BuildOGRFieldInfo()
6814 0 : CPLAssert(false);
6815 : break;
6816 : }
6817 90 : return nVal;
6818 : }
6819 :
6820 : /************************************************************************/
6821 : /* GetWorkingBufferSize() */
6822 : /************************************************************************/
6823 :
6824 1381750 : static size_t GetWorkingBufferSize(const struct ArrowSchema *schema,
6825 : const struct ArrowArray *array,
6826 : size_t iFeature, int &iArrowIdxInOut,
6827 : const std::vector<FieldInfo> &asFieldInfo)
6828 : {
6829 1381750 : const char *fieldName = schema->name;
6830 1381750 : const char *format = schema->format;
6831 1381750 : if (IsStructure(format))
6832 : {
6833 60156 : size_t nRet = 0;
6834 1381770 : for (int64_t i = 0; i < array->n_children; ++i)
6835 : {
6836 1321620 : nRet += GetWorkingBufferSize(
6837 1321620 : schema->children[i], array->children[i],
6838 1321620 : iFeature + static_cast<size_t>(array->offset), iArrowIdxInOut,
6839 : asFieldInfo);
6840 : }
6841 60156 : return nRet;
6842 : }
6843 1321600 : const int iArrowIdx = iArrowIdxInOut;
6844 1321600 : ++iArrowIdxInOut;
6845 :
6846 1321600 : if (!asFieldInfo[iArrowIdx].bUseStringOptim)
6847 121417 : return 0;
6848 :
6849 1200180 : const uint8_t *pabyValidity =
6850 1200180 : static_cast<const uint8_t *>(array->buffers[0]);
6851 1200290 : if (array->null_count != 0 && pabyValidity &&
6852 111 : !TestBit(pabyValidity, static_cast<size_t>(iFeature + array->offset)))
6853 : {
6854 : // empty string
6855 54 : return 0;
6856 : }
6857 :
6858 1200130 : if (asFieldInfo[iArrowIdx].bUseDictionary)
6859 : {
6860 41 : const uint64_t nDictIdx = GetUInt64Value(schema, array, iFeature);
6861 41 : const auto dictArray = array->dictionary;
6862 41 : if (nDictIdx >= static_cast<uint64_t>(dictArray->length))
6863 : {
6864 1 : CPLError(CE_Failure, CPLE_AppDefined,
6865 : "Feature %" PRIu64
6866 : ", field %s: invalid dictionary index: %" PRIu64,
6867 : static_cast<uint64_t>(iFeature), fieldName, nDictIdx);
6868 1 : return 0;
6869 : }
6870 :
6871 40 : array = dictArray;
6872 40 : schema = schema->dictionary;
6873 40 : format = schema->format;
6874 40 : iFeature = static_cast<size_t>(nDictIdx);
6875 : }
6876 :
6877 1200120 : if (IsString(format))
6878 : {
6879 1200120 : const auto *panOffsets =
6880 1200120 : static_cast<const uint32_t *>(array->buffers[1]) + array->offset;
6881 1200120 : return 1 + (panOffsets[iFeature + 1] - panOffsets[iFeature]);
6882 : }
6883 10 : else if (IsLargeString(format))
6884 : {
6885 10 : const auto *panOffsets =
6886 10 : static_cast<const uint64_t *>(array->buffers[1]) + array->offset;
6887 10 : return 1 + static_cast<size_t>(panOffsets[iFeature + 1] -
6888 10 : panOffsets[iFeature]);
6889 : }
6890 0 : return 0;
6891 : }
6892 :
6893 : /************************************************************************/
6894 : /* FillField() */
6895 : /************************************************************************/
6896 :
6897 : template <typename ArrowType, typename OGRType = ArrowType>
6898 210 : inline static void FillField(const struct ArrowArray *array, int iOGRFieldIdx,
6899 : size_t iFeature, OGRFeature &oFeature)
6900 : {
6901 210 : const auto *panValues = static_cast<const ArrowType *>(array->buffers[1]);
6902 210 : oFeature.SetFieldSameTypeUnsafe(
6903 : iOGRFieldIdx,
6904 210 : static_cast<OGRType>(panValues[iFeature + array->offset]));
6905 210 : }
6906 :
6907 : /************************************************************************/
6908 : /* FillFieldString() */
6909 : /************************************************************************/
6910 :
6911 : template <typename OffsetType>
6912 : inline static void
6913 1200130 : FillFieldString(const struct ArrowArray *array, int iOGRFieldIdx,
6914 : size_t iFeature, int iArrowIdx,
6915 : const std::vector<FieldInfo> &asFieldInfo,
6916 : std::string &osWorkingBuffer, OGRFeature &oFeature)
6917 : {
6918 1200130 : const auto *panOffsets =
6919 1200130 : static_cast<const OffsetType *>(array->buffers[1]) + array->offset;
6920 1200130 : const char *pszStr = static_cast<const char *>(array->buffers[2]);
6921 1200130 : const size_t nLen =
6922 1200130 : static_cast<size_t>(panOffsets[iFeature + 1] - panOffsets[iFeature]);
6923 1200130 : if (asFieldInfo[iArrowIdx].bUseStringOptim)
6924 : {
6925 1200130 : oFeature.SetFieldSameTypeUnsafe(
6926 1200130 : iOGRFieldIdx, &osWorkingBuffer[0] + osWorkingBuffer.size());
6927 1200130 : osWorkingBuffer.append(pszStr + panOffsets[iFeature], nLen);
6928 1200130 : osWorkingBuffer.push_back(0); // append null character
6929 : }
6930 : else
6931 : {
6932 0 : const std::string osTmp(pszStr, nLen);
6933 0 : oFeature.SetField(iOGRFieldIdx, osTmp.c_str());
6934 : }
6935 1200130 : }
6936 :
6937 : /************************************************************************/
6938 : /* FillFieldBinary() */
6939 : /************************************************************************/
6940 :
6941 : template <typename OffsetType>
6942 : inline static bool
6943 60093 : FillFieldBinary(const struct ArrowArray *array, int iOGRFieldIdx,
6944 : size_t iFeature, int iArrowIdx,
6945 : const std::vector<FieldInfo> &asFieldInfo,
6946 : const std::string &osFieldPrefix, const char *pszFieldName,
6947 : OGRFeature &oFeature)
6948 : {
6949 60093 : const auto *panOffsets =
6950 60093 : static_cast<const OffsetType *>(array->buffers[1]) + array->offset;
6951 60093 : const GByte *pabyData = static_cast<const GByte *>(array->buffers[2]) +
6952 60093 : static_cast<size_t>(panOffsets[iFeature]);
6953 60093 : const size_t nLen =
6954 60093 : static_cast<size_t>(panOffsets[iFeature + 1] - panOffsets[iFeature]);
6955 60093 : if (asFieldInfo[iArrowIdx].bIsGeomCol)
6956 : {
6957 60065 : size_t nBytesConsumedOut = 0;
6958 :
6959 : // Check if we can reuse the existing geometry, to save dynamic memory
6960 : // allocations.
6961 60065 : if (nLen >= 5 && pabyData[0] == wkbNDR && pabyData[1] <= wkbTriangle &&
6962 60058 : pabyData[2] == 0 && pabyData[3] == 0 && pabyData[4] == 0)
6963 : {
6964 60058 : const auto poExistingGeom = oFeature.GetGeomFieldRef(iOGRFieldIdx);
6965 120075 : if (poExistingGeom &&
6966 60017 : poExistingGeom->getGeometryType() == pabyData[1])
6967 : {
6968 60017 : poExistingGeom->importFromWkb(pabyData, nLen, wkbVariantIso,
6969 : nBytesConsumedOut);
6970 60017 : return true;
6971 : }
6972 : }
6973 :
6974 48 : OGRGeometry *poGeometry = nullptr;
6975 48 : OGRGeometryFactory::createFromWkb(pabyData, nullptr, &poGeometry, nLen,
6976 : wkbVariantIso, nBytesConsumedOut);
6977 48 : oFeature.SetGeomFieldDirectly(iOGRFieldIdx, poGeometry);
6978 : }
6979 : else
6980 : {
6981 28 : if (nLen > static_cast<size_t>(std::numeric_limits<int>::max()))
6982 : {
6983 0 : CPLError(CE_Failure, CPLE_NotSupported,
6984 : "Content for field %s%s is too large",
6985 : osFieldPrefix.c_str(), pszFieldName);
6986 0 : return false;
6987 : }
6988 28 : oFeature.SetField(iOGRFieldIdx, static_cast<int>(nLen), pabyData);
6989 : }
6990 76 : return true;
6991 : }
6992 :
6993 : /************************************************************************/
6994 : /* FillFeature() */
6995 : /************************************************************************/
6996 :
6997 1321620 : static bool FillFeature(OGRLayer *poLayer, const struct ArrowSchema *schema,
6998 : const struct ArrowArray *array,
6999 : const std::string &osFieldPrefix, size_t iFeature,
7000 : int &iArrowIdxInOut,
7001 : const std::vector<FieldInfo> &asFieldInfo,
7002 : OGRFeature &oFeature, std::string &osWorkingBuffer)
7003 :
7004 : {
7005 1321620 : const char *fieldName = schema->name;
7006 1321620 : const char *format = schema->format;
7007 1321620 : if (IsStructure(format))
7008 : {
7009 38 : const std::string osNewPrefix(osFieldPrefix + fieldName + ".");
7010 78 : for (int64_t i = 0; i < array->n_children; ++i)
7011 : {
7012 59 : if (!FillFeature(
7013 59 : poLayer, schema->children[i], array->children[i],
7014 59 : osNewPrefix, iFeature + static_cast<size_t>(array->offset),
7015 : iArrowIdxInOut, asFieldInfo, oFeature, osWorkingBuffer))
7016 0 : return false;
7017 : }
7018 19 : return true;
7019 : }
7020 1321600 : const int iArrowIdx = iArrowIdxInOut;
7021 1321600 : ++iArrowIdxInOut;
7022 1321600 : const int iOGRFieldIdx = asFieldInfo[iArrowIdx].iOGRFieldIdx;
7023 :
7024 1321600 : if (asFieldInfo[iArrowIdx].bUseDictionary)
7025 : {
7026 62 : format = schema->dictionary->format;
7027 : }
7028 :
7029 1321600 : if (array->null_count != 0)
7030 : {
7031 989 : const uint8_t *pabyValidity =
7032 989 : static_cast<const uint8_t *>(array->buffers[0]);
7033 1929 : if (pabyValidity &&
7034 940 : !TestBit(pabyValidity,
7035 940 : static_cast<size_t>(iFeature + array->offset)))
7036 : {
7037 285 : if (iOGRFieldIdx == FID_COLUMN_SPECIAL_OGR_FIELD_IDX)
7038 5 : oFeature.SetFID(OGRNullFID);
7039 280 : else if (asFieldInfo[iArrowIdx].bIsGeomCol)
7040 60 : oFeature.SetGeomFieldDirectly(iOGRFieldIdx, nullptr);
7041 220 : else if (asFieldInfo[iArrowIdx].eSetFeatureFieldType == OFTString)
7042 : {
7043 117 : OGRField *psField = oFeature.GetRawFieldRef(iOGRFieldIdx);
7044 117 : if (!asFieldInfo[iArrowIdx].bUseStringOptim)
7045 : {
7046 63 : if (IsValidField(psField))
7047 : {
7048 51 : CPLFree(psField->String);
7049 51 : OGR_RawField_SetNull(psField);
7050 : }
7051 : }
7052 : else
7053 : {
7054 54 : OGR_RawField_SetNull(psField);
7055 : }
7056 : }
7057 : else
7058 : {
7059 103 : OGRField *psField = oFeature.GetRawFieldRef(iOGRFieldIdx);
7060 103 : switch (asFieldInfo[iArrowIdx].eSetFeatureFieldType)
7061 : {
7062 47 : case OFTRealList:
7063 : case OFTIntegerList:
7064 : case OFTInteger64List:
7065 47 : if (IsValidField(psField))
7066 47 : CPLFree(psField->IntegerList.paList);
7067 47 : break;
7068 :
7069 7 : case OFTStringList:
7070 7 : if (IsValidField(psField))
7071 7 : CSLDestroy(psField->StringList.paList);
7072 7 : break;
7073 :
7074 1 : case OFTBinary:
7075 1 : if (IsValidField(psField))
7076 1 : CPLFree(psField->Binary.paData);
7077 1 : break;
7078 :
7079 48 : default:
7080 48 : break;
7081 : }
7082 103 : OGR_RawField_SetNull(psField);
7083 : }
7084 285 : return true;
7085 : }
7086 : }
7087 :
7088 1321310 : if (asFieldInfo[iArrowIdx].bUseDictionary)
7089 : {
7090 49 : const uint64_t nDictIdx = GetUInt64Value(schema, array, iFeature);
7091 49 : auto dictArray = array->dictionary;
7092 49 : if (nDictIdx >= static_cast<uint64_t>(dictArray->length))
7093 : {
7094 2 : CPLError(CE_Failure, CPLE_AppDefined,
7095 : "Feature %" PRIu64
7096 : ", field %s: invalid dictionary index: %" PRIu64,
7097 : static_cast<uint64_t>(iFeature),
7098 4 : (osFieldPrefix + fieldName).c_str(), nDictIdx);
7099 2 : return false;
7100 : }
7101 47 : array = dictArray;
7102 47 : schema = schema->dictionary;
7103 47 : iFeature = static_cast<size_t>(nDictIdx);
7104 : }
7105 :
7106 1321310 : if (IsBoolean(format))
7107 : {
7108 12 : const uint8_t *pabyValues =
7109 12 : static_cast<const uint8_t *>(array->buffers[1]);
7110 12 : oFeature.SetFieldSameTypeUnsafe(
7111 : iOGRFieldIdx,
7112 12 : TestBit(pabyValues, static_cast<size_t>(iFeature + array->offset))
7113 : ? 1
7114 : : 0);
7115 12 : return true;
7116 : }
7117 1321300 : else if (IsInt8(format))
7118 : {
7119 10 : FillField<int8_t>(array, iOGRFieldIdx, iFeature, oFeature);
7120 10 : return true;
7121 : }
7122 1321290 : else if (IsUInt8(format))
7123 : {
7124 10 : FillField<uint8_t>(array, iOGRFieldIdx, iFeature, oFeature);
7125 10 : return true;
7126 : }
7127 1321280 : else if (IsInt16(format))
7128 : {
7129 12 : FillField<int16_t>(array, iOGRFieldIdx, iFeature, oFeature);
7130 12 : return true;
7131 : }
7132 1321270 : else if (IsUInt16(format))
7133 : {
7134 10 : FillField<uint16_t>(array, iOGRFieldIdx, iFeature, oFeature);
7135 10 : return true;
7136 : }
7137 1321260 : else if (IsInt32(format))
7138 : {
7139 28 : if (iOGRFieldIdx == FID_COLUMN_SPECIAL_OGR_FIELD_IDX)
7140 : {
7141 2 : const auto *panValues =
7142 2 : static_cast<const int32_t *>(array->buffers[1]);
7143 2 : oFeature.SetFID(panValues[iFeature + array->offset]);
7144 : }
7145 : else
7146 : {
7147 26 : FillField<int32_t>(array, iOGRFieldIdx, iFeature, oFeature);
7148 : }
7149 28 : return true;
7150 : }
7151 1321230 : else if (IsUInt32(format))
7152 : {
7153 4 : FillField<uint32_t, GIntBig>(array, iOGRFieldIdx, iFeature, oFeature);
7154 4 : return true;
7155 : }
7156 1321220 : else if (IsInt64(format))
7157 : {
7158 60114 : if (iOGRFieldIdx == FID_COLUMN_SPECIAL_OGR_FIELD_IDX)
7159 : {
7160 60046 : const auto *panValues =
7161 60046 : static_cast<const int64_t *>(array->buffers[1]);
7162 60046 : oFeature.SetFID(panValues[iFeature + array->offset]);
7163 : }
7164 : else
7165 : {
7166 68 : FillField<int64_t, GIntBig>(array, iOGRFieldIdx, iFeature,
7167 : oFeature);
7168 : }
7169 60114 : return true;
7170 : }
7171 1261110 : else if (IsUInt64(format))
7172 : {
7173 10 : FillField<uint64_t, double>(array, iOGRFieldIdx, iFeature, oFeature);
7174 10 : return true;
7175 : }
7176 1261100 : else if (IsFloat32(format))
7177 : {
7178 12 : FillField<float>(array, iOGRFieldIdx, iFeature, oFeature);
7179 12 : return true;
7180 : }
7181 1261090 : else if (IsFloat64(format))
7182 : {
7183 48 : FillField<double>(array, iOGRFieldIdx, iFeature, oFeature);
7184 48 : return true;
7185 : }
7186 1261040 : else if (IsString(format))
7187 : {
7188 1200120 : FillFieldString<uint32_t>(array, iOGRFieldIdx, iFeature, iArrowIdx,
7189 : asFieldInfo, osWorkingBuffer, oFeature);
7190 1200120 : return true;
7191 : }
7192 60925 : else if (IsLargeString(format))
7193 : {
7194 10 : FillFieldString<uint64_t>(array, iOGRFieldIdx, iFeature, iArrowIdx,
7195 : asFieldInfo, osWorkingBuffer, oFeature);
7196 10 : return true;
7197 : }
7198 60915 : else if (IsBinary(format))
7199 : {
7200 60077 : return FillFieldBinary<uint32_t>(array, iOGRFieldIdx, iFeature,
7201 : iArrowIdx, asFieldInfo, osFieldPrefix,
7202 60077 : fieldName, oFeature);
7203 : }
7204 838 : else if (IsLargeBinary(format))
7205 : {
7206 16 : return FillFieldBinary<uint64_t>(array, iOGRFieldIdx, iFeature,
7207 : iArrowIdx, asFieldInfo, osFieldPrefix,
7208 16 : fieldName, oFeature);
7209 : }
7210 822 : else if (asFieldInfo[iArrowIdx].nPrecision > 0)
7211 : {
7212 : // fits on a int64
7213 46 : CPLAssert(asFieldInfo[iArrowIdx].nPrecision <= 19);
7214 : // either 128 or 256 bits
7215 46 : CPLAssert((asFieldInfo[iArrowIdx].nWidthInBytes % 8) == 0);
7216 46 : const int nWidthIn64BitWord = asFieldInfo[iArrowIdx].nWidthInBytes / 8;
7217 :
7218 46 : if (IsList(format))
7219 : {
7220 16 : const auto panOffsets =
7221 16 : static_cast<const uint32_t *>(array->buffers[1]) +
7222 16 : array->offset;
7223 16 : const auto childArray = array->children[0];
7224 16 : std::vector<double> aValues;
7225 33 : for (auto i = panOffsets[iFeature]; i < panOffsets[iFeature + 1];
7226 : ++i)
7227 : {
7228 17 : aValues.push_back(GetValueDecimal(childArray, nWidthIn64BitWord,
7229 17 : asFieldInfo[iArrowIdx].nScale,
7230 : i));
7231 : }
7232 16 : oFeature.SetField(iOGRFieldIdx, static_cast<int>(aValues.size()),
7233 16 : aValues.data());
7234 16 : return true;
7235 : }
7236 30 : else if (IsLargeList(format))
7237 : {
7238 4 : const auto panOffsets =
7239 4 : static_cast<const uint64_t *>(array->buffers[1]) +
7240 4 : array->offset;
7241 4 : const auto childArray = array->children[0];
7242 4 : std::vector<double> aValues;
7243 4 : for (auto i = static_cast<size_t>(panOffsets[iFeature]);
7244 9 : i < static_cast<size_t>(panOffsets[iFeature + 1]); ++i)
7245 : {
7246 5 : aValues.push_back(GetValueDecimal(childArray, nWidthIn64BitWord,
7247 5 : asFieldInfo[iArrowIdx].nScale,
7248 : i));
7249 : }
7250 4 : oFeature.SetField(iOGRFieldIdx, static_cast<int>(aValues.size()),
7251 4 : aValues.data());
7252 4 : return true;
7253 : }
7254 26 : else if (IsFixedSizeList(format))
7255 : {
7256 4 : const int nVals = GetFixedSizeList(format);
7257 4 : const auto childArray = array->children[0];
7258 4 : std::vector<double> aValues;
7259 12 : for (int i = 0; i < nVals; ++i)
7260 : {
7261 8 : aValues.push_back(GetValueDecimal(childArray, nWidthIn64BitWord,
7262 8 : asFieldInfo[iArrowIdx].nScale,
7263 8 : iFeature * nVals + i));
7264 : }
7265 4 : oFeature.SetField(iOGRFieldIdx, nVals, aValues.data());
7266 4 : return true;
7267 : }
7268 :
7269 22 : CPLAssert(format[0] == ARROW_LETTER_DECIMAL);
7270 :
7271 22 : oFeature.SetFieldSameTypeUnsafe(
7272 : iOGRFieldIdx,
7273 : GetValueDecimal(array, nWidthIn64BitWord,
7274 22 : asFieldInfo[iArrowIdx].nScale, iFeature));
7275 22 : return true;
7276 : }
7277 776 : else if (SetFieldForOtherFormats(
7278 : oFeature, iOGRFieldIdx,
7279 776 : static_cast<size_t>(iFeature + array->offset), schema, array))
7280 : {
7281 776 : return true;
7282 : }
7283 :
7284 0 : CPLError(CE_Failure, CPLE_NotSupported, "%s",
7285 0 : ("Type '" + std::string(format) + "' for field " + osFieldPrefix +
7286 0 : fieldName + " is not supported.")
7287 : .c_str());
7288 0 : return false;
7289 : }
7290 :
7291 : /************************************************************************/
7292 : /* OGRLayer::WriteArrowBatch() */
7293 : /************************************************************************/
7294 :
7295 : // clang-format off
7296 : /** Writes a batch of rows from an ArrowArray.
7297 : *
7298 : * This is semantically close to calling CreateFeature() with multiple features
7299 : * at once.
7300 : *
7301 : * The ArrowArray must be of type struct (format=+s), and its children generally
7302 : * map to a OGR attribute or geometry field (unless they are struct themselves).
7303 : *
7304 : * Method IsArrowSchemaSupported() can be called to determine if the schema
7305 : * will be supported by WriteArrowBatch().
7306 : *
7307 : * OGR fields for the corresponding children arrays must exist and be of a
7308 : * compatible type. For attribute fields, they should generally be created with
7309 : * CreateFieldFromArrowSchema(). This is strictly required for output drivers
7310 : * Arrow or Parquet, and strongly recommended otherwise. For geometry fields,
7311 : * they should be created either implicitly at CreateLayer() type
7312 : * (if geom_type != wkbNone), or explicitly with CreateGeomField().
7313 : *
7314 : * Starting with GDAL 3.9, some tolerance has been introduced in the base
7315 : * implementation of WriteArrowBatch() for scenarios that involve appending to
7316 : * an already existing output layer when the input Arrow field type and the
7317 : * OGR layer field type are 32/64-bi integers or real number, but do not match
7318 : * exactly, which may cause lossy conversions. The IF_FIELD_NOT_PRESERVED option
7319 : * can be used to control the behavior in case of lossy conversion.
7320 : *
7321 : * Arrays for geometry columns should be of binary or large binary type and
7322 : * contain WKB geometry.
7323 : *
7324 : * Note that the passed array may be set to a released state
7325 : * (array->release==NULL) after this call (not by the base implementation,
7326 : * but in specialized ones such as Parquet or Arrow for example)
7327 : *
7328 : * Supported options of the base implementation are:
7329 : * <ul>
7330 : * <li>FID=name. Name of the FID column in the array. If not provided,
7331 : * GetFIDColumn() is used to determine it. The special name
7332 : * OGRLayer::DEFAULT_ARROW_FID_NAME is also recognized if neither FID nor
7333 : * GetFIDColumn() are set.
7334 : * The corresponding ArrowArray must be of type int32 (i) or int64 (l).
7335 : * On input, values of the FID column are used to create the feature.
7336 : * On output, the values of the FID column may be set with the FID of the
7337 : * created feature (if the array is not released).
7338 : * </li>
7339 : * <li>IF_FID_NOT_PRESERVED=NOTHING/ERROR/WARNING. Action to perform when the
7340 : * input FID is not preserved in the output layer. The default is NOTHING.
7341 : * Setting it to ERROR will cause the function to error out. Setting it
7342 : * to WARNING will cause the function to emit a warning but continue its
7343 : * processing.
7344 : * </li>
7345 : * <li>IF_FIELD_NOT_PRESERVED=ERROR/WARNING. (since GDAL 3.9)
7346 : * Action to perform when the input field value is not preserved in the
7347 : * output layer.
7348 : * The default is WARNING, which will cause the function to emit a warning
7349 : * but continue its processing.
7350 : * Setting it to ERROR will cause the function to error out if a lossy
7351 : * conversion is detected.
7352 : * </li>
7353 : * <li>GEOMETRY_NAME=name. Name of the geometry column. If not provided,
7354 : * GetGeometryColumn() is used. The special name
7355 : * OGRLayer::DEFAULT_ARROW_GEOMETRY_NAME is also recognized if neither
7356 : * GEOMETRY_NAME nor GetGeometryColumn() are set.
7357 : * Geometry columns are also identified if they have
7358 : * ARROW:extension:name=ogc.wkb as a field metadata.
7359 : * The corresponding ArrowArray must be of type binary (w) or large
7360 : * binary (W).
7361 : * </li>
7362 : * </ul>
7363 : *
7364 : * The following example demonstrates how to copy a layer from one format to
7365 : * another one (assuming it has at most a single geometry column):
7366 : \code{.py}
7367 : def copy_layer(src_lyr, out_filename, out_format, lcos = {}):
7368 : stream = src_lyr.GetArrowStream()
7369 : schema = stream.GetSchema()
7370 :
7371 : # If the source layer has a FID column and the output driver supports
7372 : # a FID layer creation option, set it to the source FID column name.
7373 : if src_lyr.GetFIDColumn():
7374 : creationOptions = gdal.GetDriverByName(out_format).GetMetadataItem(
7375 : "DS_LAYER_CREATIONOPTIONLIST"
7376 : )
7377 : if creationOptions and '"FID"' in creationOptions:
7378 : lcos["FID"] = src_lyr.GetFIDColumn()
7379 :
7380 : with ogr.GetDriverByName(out_format).CreateDataSource(out_filename) as out_ds:
7381 : if src_lyr.GetLayerDefn().GetGeomFieldCount() > 1:
7382 : out_lyr = out_ds.CreateLayer(
7383 : src_lyr.GetName(), geom_type=ogr.wkbNone, options=lcos
7384 : )
7385 : for i in range(src_lyr.GetLayerDefn().GetGeomFieldCount()):
7386 : out_lyr.CreateGeomField(src_lyr.GetLayerDefn().GetGeomFieldDefn(i))
7387 : else:
7388 : out_lyr = out_ds.CreateLayer(
7389 : src_lyr.GetName(),
7390 : geom_type=src_lyr.GetGeomType(),
7391 : srs=src_lyr.GetSpatialRef(),
7392 : options=lcos,
7393 : )
7394 :
7395 : success, error_msg = out_lyr.IsArrowSchemaSupported(schema)
7396 : assert success, error_msg
7397 :
7398 : src_geom_field_names = [
7399 : src_lyr.GetLayerDefn().GetGeomFieldDefn(i).GetName()
7400 : for i in range(src_lyr.GetLayerDefn().GetGeomFieldCount())
7401 : ]
7402 : for i in range(schema.GetChildrenCount()):
7403 : # GetArrowStream() may return "OGC_FID" for a unnamed source FID
7404 : # column and "wkb_geometry" for a unnamed source geometry column.
7405 : # Also test GetFIDColumn() and src_geom_field_names if they are
7406 : # named.
7407 : if (
7408 : schema.GetChild(i).GetName()
7409 : not in ("OGC_FID", "wkb_geometry", src_lyr.GetFIDColumn())
7410 : and schema.GetChild(i).GetName() not in src_geom_field_names
7411 : ):
7412 : out_lyr.CreateFieldFromArrowSchema(schema.GetChild(i))
7413 :
7414 : write_options = []
7415 : if src_lyr.GetFIDColumn():
7416 : write_options.append("FID=" + src_lyr.GetFIDColumn())
7417 : if (
7418 : src_lyr.GetLayerDefn().GetGeomFieldCount() == 1
7419 : and src_lyr.GetGeometryColumn()
7420 : ):
7421 : write_options.append("GEOMETRY_NAME=" + src_lyr.GetGeometryColumn())
7422 :
7423 : while True:
7424 : array = stream.GetNextRecordBatch()
7425 : if array is None:
7426 : break
7427 : out_lyr.WriteArrowBatch(schema, array, write_options)
7428 : \endcode
7429 : *
7430 : * This method and CreateFeature() are mutually exclusive in the same session.
7431 : *
7432 : * This method is the same as the C function OGR_L_WriteArrowBatch().
7433 : *
7434 : * @param schema Schema of array
7435 : * @param array Array of type struct. It may be released (array->release==NULL)
7436 : * after calling this method.
7437 : * @param papszOptions Options. Null terminated list, or nullptr.
7438 : * @return true in case of success
7439 : * @since 3.8
7440 : */
7441 : // clang-format on
7442 :
7443 75 : bool OGRLayer::WriteArrowBatch(const struct ArrowSchema *schema,
7444 : struct ArrowArray *array,
7445 : CSLConstList papszOptions)
7446 : {
7447 75 : const char *format = schema->format;
7448 75 : if (!IsStructure(format))
7449 : {
7450 0 : CPLError(CE_Failure, CPLE_AppDefined,
7451 : "WriteArrowBatch() should be called on a schema that is a "
7452 : "struct of fields");
7453 0 : return false;
7454 : }
7455 :
7456 75 : if (schema->n_children != array->n_children)
7457 : {
7458 0 : CPLError(CE_Failure, CPLE_AppDefined,
7459 : "WriteArrowBatch(): schema->n_children (%d) != "
7460 : "array->n_children (%d)",
7461 0 : int(schema->n_children), int(array->n_children));
7462 0 : return false;
7463 : }
7464 :
7465 150 : CPLStringList aosNativeTypes;
7466 75 : auto poDS = const_cast<OGRLayer *>(this)->GetDataset();
7467 75 : if (poDS)
7468 : {
7469 75 : auto poDriver = poDS->GetDriver();
7470 75 : if (poDriver)
7471 : {
7472 : const char *pszMetadataItem =
7473 75 : poDriver->GetMetadataItem(GDAL_DMD_CREATIONFIELDDATATYPES);
7474 75 : if (pszMetadataItem)
7475 75 : aosNativeTypes = CSLTokenizeString2(pszMetadataItem, " ", 0);
7476 : }
7477 : }
7478 :
7479 150 : std::vector<FieldInfo> asFieldInfo;
7480 75 : auto poLayerDefn = GetLayerDefn();
7481 : const char *pszFIDName =
7482 75 : CSLFetchNameValueDef(papszOptions, "FID", GetFIDColumn());
7483 75 : if (!pszFIDName || pszFIDName[0] == 0)
7484 56 : pszFIDName = DEFAULT_ARROW_FID_NAME;
7485 : const bool bErrorIfFIDNotPreserved =
7486 75 : EQUAL(CSLFetchNameValueDef(papszOptions, "IF_FID_NOT_PRESERVED", ""),
7487 : "ERROR");
7488 : const bool bWarningIfFIDNotPreserved =
7489 75 : EQUAL(CSLFetchNameValueDef(papszOptions, "IF_FID_NOT_PRESERVED", ""),
7490 : "WARNING");
7491 : const bool bErrorIfFieldNotPreserved =
7492 75 : EQUAL(CSLFetchNameValueDef(papszOptions, "IF_FIELD_NOT_PRESERVED", ""),
7493 : "ERROR");
7494 75 : const char *pszGeomFieldName = CSLFetchNameValueDef(
7495 75 : papszOptions, "GEOMETRY_NAME", GetGeometryColumn());
7496 75 : if (!pszGeomFieldName || pszGeomFieldName[0] == 0)
7497 54 : pszGeomFieldName = DEFAULT_ARROW_GEOMETRY_NAME;
7498 75 : const struct ArrowSchema *schemaFIDColumn = nullptr;
7499 75 : struct ArrowArray *arrayFIDColumn = nullptr;
7500 75 : bool bFallbackTypesUsed = false;
7501 793 : for (int64_t i = 0; i < schema->n_children; ++i)
7502 : {
7503 719 : if (!BuildOGRFieldInfo(schema->children[i], array->children[i],
7504 719 : poLayerDefn, std::string(), aosNativeTypes,
7505 : bFallbackTypesUsed, asFieldInfo, pszFIDName,
7506 : pszGeomFieldName, this,
7507 719 : m_poPrivate->m_oMapArrowFieldNameToOGRFieldName,
7508 : schemaFIDColumn, arrayFIDColumn))
7509 : {
7510 1 : return false;
7511 : }
7512 : }
7513 :
7514 148 : std::map<int, int> oMapOGRFieldIndexToFieldInfoIndex;
7515 148 : std::vector<bool> abUseStringOptim(poLayerDefn->GetFieldCount(), false);
7516 812 : for (int i = 0; i < static_cast<int>(asFieldInfo.size()); ++i)
7517 : {
7518 738 : if (asFieldInfo[i].iOGRFieldIdx >= 0 && !asFieldInfo[i].bIsGeomCol)
7519 : {
7520 642 : CPLAssert(oMapOGRFieldIndexToFieldInfoIndex.find(
7521 : asFieldInfo[i].iOGRFieldIdx) ==
7522 : oMapOGRFieldIndexToFieldInfoIndex.end());
7523 642 : oMapOGRFieldIndexToFieldInfoIndex[asFieldInfo[i].iOGRFieldIdx] = i;
7524 1284 : abUseStringOptim[asFieldInfo[i].iOGRFieldIdx] =
7525 1284 : asFieldInfo[i].bUseStringOptim;
7526 : }
7527 : }
7528 :
7529 148 : OGRFeatureDefn oLayerDefnTmp(poLayerDefn->GetName());
7530 :
7531 : struct LayerDefnTmpRefReleaser
7532 : {
7533 : OGRFeatureDefn &m_oDefn;
7534 :
7535 74 : explicit LayerDefnTmpRefReleaser(OGRFeatureDefn &oDefn) : m_oDefn(oDefn)
7536 : {
7537 74 : m_oDefn.Reference();
7538 74 : }
7539 :
7540 74 : ~LayerDefnTmpRefReleaser()
7541 74 : {
7542 74 : m_oDefn.Dereference();
7543 74 : }
7544 : };
7545 :
7546 148 : LayerDefnTmpRefReleaser oLayerDefnTmpRefReleaser(oLayerDefnTmp);
7547 :
7548 148 : std::vector<int> anIdentityFieldMap;
7549 74 : if (bFallbackTypesUsed)
7550 : {
7551 27 : oLayerDefnTmp.SetGeomType(wkbNone);
7552 93 : for (int i = 0; i < poLayerDefn->GetFieldCount(); ++i)
7553 : {
7554 66 : anIdentityFieldMap.push_back(i);
7555 66 : const auto poSrcFieldDefn = poLayerDefn->GetFieldDefn(i);
7556 66 : const auto oIter = oMapOGRFieldIndexToFieldInfoIndex.find(i);
7557 : OGRFieldDefn oFieldDefn(
7558 : poSrcFieldDefn->GetNameRef(),
7559 66 : oIter == oMapOGRFieldIndexToFieldInfoIndex.end()
7560 0 : ? poSrcFieldDefn->GetType()
7561 132 : : asFieldInfo[oIter->second].eNominalFieldType);
7562 66 : if (oIter != oMapOGRFieldIndexToFieldInfoIndex.end())
7563 66 : asFieldInfo[oIter->second].eSetFeatureFieldType =
7564 66 : asFieldInfo[oIter->second].eNominalFieldType;
7565 66 : oLayerDefnTmp.AddFieldDefn(&oFieldDefn);
7566 : }
7567 54 : for (int i = 0; i < poLayerDefn->GetGeomFieldCount(); ++i)
7568 : {
7569 27 : oLayerDefnTmp.AddGeomFieldDefn(poLayerDefn->GetGeomFieldDefn(i));
7570 : }
7571 : }
7572 : else
7573 : {
7574 690 : for (auto &sFieldInfo : asFieldInfo)
7575 643 : sFieldInfo.eSetFeatureFieldType = sFieldInfo.eTargetFieldType;
7576 : }
7577 :
7578 : struct FeatureCleaner
7579 : {
7580 : OGRFeature &m_oFeature;
7581 : const std::vector<bool> &m_abUseStringOptim;
7582 :
7583 74 : explicit FeatureCleaner(OGRFeature &oFeature,
7584 : const std::vector<bool> &abUseStringOptim)
7585 74 : : m_oFeature(oFeature), m_abUseStringOptim(abUseStringOptim)
7586 : {
7587 74 : }
7588 :
7589 : // As we set a value that can't be CPLFree()'d in the .String member
7590 : // of string fields, we must take care of manually unsetting it before
7591 : // the destructor of OGRFeature gets called.
7592 74 : ~FeatureCleaner()
7593 74 : {
7594 74 : const auto poLayerDefn = m_oFeature.GetDefnRef();
7595 74 : const int nFieldCount = poLayerDefn->GetFieldCount();
7596 719 : for (int i = 0; i < nFieldCount; ++i)
7597 : {
7598 645 : if (m_abUseStringOptim[i])
7599 : {
7600 123 : if (m_oFeature.IsFieldSetAndNotNullUnsafe(i))
7601 95 : m_oFeature.SetFieldSameTypeUnsafe(
7602 : i, static_cast<char *>(nullptr));
7603 : }
7604 : }
7605 74 : }
7606 : };
7607 :
7608 148 : OGRFeature oFeature(bFallbackTypesUsed ? &oLayerDefnTmp : poLayerDefn);
7609 148 : FeatureCleaner oCleaner(oFeature, abUseStringOptim);
7610 148 : OGRFeature oFeatureTarget(poLayerDefn);
7611 74 : OGRFeature *const poFeatureTarget =
7612 74 : bFallbackTypesUsed ? &oFeatureTarget : &oFeature;
7613 :
7614 : // We accumulate the content of all strings in osWorkingBuffer to avoid
7615 : // a few dynamic memory allocations
7616 148 : std::string osWorkingBuffer;
7617 :
7618 : bool bTransactionOK;
7619 : {
7620 74 : CPLErrorStateBackuper oBackuper(CPLQuietErrorHandler);
7621 74 : bTransactionOK = StartTransaction() == OGRERR_NONE;
7622 : }
7623 :
7624 148 : const std::string emptyString;
7625 74 : int64_t fidNullCount = 0;
7626 60201 : for (size_t iFeature = 0; iFeature < static_cast<size_t>(array->length);
7627 : ++iFeature)
7628 : {
7629 60137 : oFeature.SetFID(OGRNullFID);
7630 :
7631 60137 : int iArrowIdx = 0;
7632 60137 : const size_t nWorkingBufferSize = GetWorkingBufferSize(
7633 : schema, array, iFeature, iArrowIdx, asFieldInfo);
7634 60137 : osWorkingBuffer.clear();
7635 60137 : osWorkingBuffer.reserve(nWorkingBufferSize);
7636 : #ifdef DEBUG
7637 60137 : const char *pszWorkingBuffer = osWorkingBuffer.c_str();
7638 60137 : CPL_IGNORE_RET_VAL(pszWorkingBuffer);
7639 : #endif
7640 60137 : iArrowIdx = 0;
7641 1381690 : for (int64_t i = 0; i < schema->n_children; ++i)
7642 : {
7643 1321560 : if (!FillFeature(this, schema->children[i], array->children[i],
7644 : emptyString, iFeature, iArrowIdx, asFieldInfo,
7645 : oFeature, osWorkingBuffer))
7646 : {
7647 2 : if (bTransactionOK)
7648 2 : RollbackTransaction();
7649 10 : return false;
7650 : }
7651 : }
7652 : #ifdef DEBUG
7653 : // Check that the buffer didn't get reallocated
7654 60135 : CPLAssert(pszWorkingBuffer == osWorkingBuffer.c_str());
7655 60135 : CPLAssert(osWorkingBuffer.size() == nWorkingBufferSize);
7656 : #endif
7657 :
7658 60135 : if (bFallbackTypesUsed)
7659 : {
7660 36 : oFeatureTarget.SetFrom(&oFeature, anIdentityFieldMap.data(),
7661 : /*bForgiving=*/true,
7662 : /*bUseISO8601ForDateTimeAsString=*/true);
7663 36 : oFeatureTarget.SetFID(oFeature.GetFID());
7664 :
7665 36 : if (bErrorIfFieldNotPreserved)
7666 : {
7667 26 : for (int i = 0; i < poLayerDefn->GetFieldCount(); ++i)
7668 : {
7669 16 : if (!oFeature.IsFieldSetAndNotNullUnsafe(i))
7670 : {
7671 4 : continue;
7672 : }
7673 12 : bool bLossyConversion = false;
7674 : const auto eSrcType =
7675 12 : oLayerDefnTmp.GetFieldDefnUnsafe(i)->GetType();
7676 : const auto eDstType =
7677 12 : poLayerDefn->GetFieldDefnUnsafe(i)->GetType();
7678 :
7679 : const auto IsDoubleCastToInt64EqualTInt64 =
7680 2 : [](double dfVal, int64_t nOtherVal)
7681 : {
7682 : // Values in the range [INT64_MAX - 1023, INT64_MAX - 1]
7683 : // get converted to a double that once cast to int64_t
7684 : // is INT64_MAX + 1, hence the strict < comparison
7685 : return dfVal >=
7686 2 : static_cast<double>(
7687 2 : std::numeric_limits<int64_t>::min()) &&
7688 : dfVal <
7689 2 : static_cast<double>(
7690 4 : std::numeric_limits<int64_t>::max()) &&
7691 3 : static_cast<int64_t>(dfVal) == nOtherVal;
7692 : };
7693 :
7694 14 : if (eSrcType == OFTInteger64 && eDstType == OFTInteger &&
7695 2 : oFeatureTarget.GetFieldAsIntegerUnsafe(i) !=
7696 2 : oFeature.GetFieldAsInteger64Unsafe(i))
7697 : {
7698 1 : bLossyConversion = true;
7699 : }
7700 14 : else if (eSrcType == OFTReal && eDstType == OFTInteger &&
7701 3 : oFeatureTarget.GetFieldAsIntegerUnsafe(i) !=
7702 3 : oFeature.GetFieldAsDoubleUnsafe(i))
7703 : {
7704 2 : bLossyConversion = true;
7705 : }
7706 12 : else if (eSrcType == OFTReal && eDstType == OFTInteger64 &&
7707 3 : static_cast<double>(
7708 3 : oFeatureTarget.GetFieldAsInteger64Unsafe(i)) !=
7709 3 : oFeature.GetFieldAsDoubleUnsafe(i))
7710 : {
7711 2 : bLossyConversion = true;
7712 : }
7713 9 : else if (eSrcType == OFTInteger64 && eDstType == OFTReal &&
7714 2 : !IsDoubleCastToInt64EqualTInt64(
7715 : oFeatureTarget.GetFieldAsDoubleUnsafe(i),
7716 2 : oFeature.GetFieldAsInteger64Unsafe(i)))
7717 : {
7718 1 : bLossyConversion = true;
7719 : }
7720 12 : if (bLossyConversion)
7721 : {
7722 6 : CPLError(CE_Failure, CPLE_AppDefined,
7723 : "For feature " CPL_FRMT_GIB
7724 : ", value of field %s cannot not preserved",
7725 : oFeatureTarget.GetFID(),
7726 : oLayerDefnTmp.GetFieldDefn(i)->GetNameRef());
7727 6 : if (bTransactionOK)
7728 6 : RollbackTransaction();
7729 6 : return false;
7730 : }
7731 : }
7732 : }
7733 : }
7734 :
7735 60129 : const auto nInputFID = poFeatureTarget->GetFID();
7736 60129 : if (CreateFeature(poFeatureTarget) != OGRERR_NONE)
7737 : {
7738 1 : if (bTransactionOK)
7739 1 : RollbackTransaction();
7740 1 : return false;
7741 : }
7742 60128 : if (nInputFID != OGRNullFID)
7743 : {
7744 120075 : if (bWarningIfFIDNotPreserved &&
7745 : // cppcheck-suppress knownConditionTrueFalse
7746 60028 : poFeatureTarget->GetFID() != nInputFID)
7747 : {
7748 2 : CPLError(CE_Warning, CPLE_AppDefined,
7749 : "Feature id " CPL_FRMT_GIB " not preserved",
7750 : nInputFID);
7751 : }
7752 60046 : else if (bErrorIfFIDNotPreserved &&
7753 : // cppcheck-suppress knownConditionTrueFalse
7754 1 : poFeatureTarget->GetFID() != nInputFID)
7755 : {
7756 1 : CPLError(CE_Failure, CPLE_AppDefined,
7757 : "Feature id " CPL_FRMT_GIB " not preserved",
7758 : nInputFID);
7759 1 : if (bTransactionOK)
7760 1 : RollbackTransaction();
7761 1 : return false;
7762 : }
7763 : }
7764 :
7765 60127 : if (arrayFIDColumn)
7766 : {
7767 60051 : uint8_t *pabyValidity = static_cast<uint8_t *>(
7768 60051 : const_cast<void *>(arrayFIDColumn->buffers[0]));
7769 60051 : if (IsInt32(schemaFIDColumn->format))
7770 : {
7771 6 : auto *panValues = static_cast<int32_t *>(
7772 6 : const_cast<void *>(arrayFIDColumn->buffers[1]));
7773 6 : if (poFeatureTarget->GetFID() >
7774 6 : std::numeric_limits<int32_t>::max())
7775 : {
7776 0 : if (pabyValidity)
7777 : {
7778 0 : ++fidNullCount;
7779 0 : UnsetBit(pabyValidity,
7780 0 : static_cast<size_t>(iFeature +
7781 0 : arrayFIDColumn->offset));
7782 : }
7783 0 : CPLError(CE_Warning, CPLE_AppDefined,
7784 : "FID " CPL_FRMT_GIB
7785 : " cannot be stored in FID array of type int32",
7786 : poFeatureTarget->GetFID());
7787 : }
7788 : else
7789 : {
7790 6 : if (pabyValidity)
7791 : {
7792 5 : SetBit(pabyValidity,
7793 5 : static_cast<size_t>(iFeature +
7794 5 : arrayFIDColumn->offset));
7795 : }
7796 6 : panValues[iFeature + arrayFIDColumn->offset] =
7797 6 : static_cast<int32_t>(poFeatureTarget->GetFID());
7798 : }
7799 : }
7800 60045 : else if (IsInt64(schemaFIDColumn->format))
7801 : {
7802 60045 : if (pabyValidity)
7803 : {
7804 0 : SetBit(
7805 : pabyValidity,
7806 0 : static_cast<size_t>(iFeature + arrayFIDColumn->offset));
7807 : }
7808 60045 : auto *panValues = static_cast<int64_t *>(
7809 60045 : const_cast<void *>(arrayFIDColumn->buffers[1]));
7810 60045 : panValues[iFeature + arrayFIDColumn->offset] =
7811 60045 : poFeatureTarget->GetFID();
7812 : }
7813 : else
7814 : {
7815 0 : CPLAssert(false);
7816 : }
7817 : }
7818 : }
7819 64 : if (arrayFIDColumn && arrayFIDColumn->buffers[0])
7820 : {
7821 1 : arrayFIDColumn->null_count = fidNullCount;
7822 : }
7823 :
7824 64 : bool bRet = true;
7825 64 : if (bTransactionOK)
7826 58 : bRet = CommitTransaction() == OGRERR_NONE;
7827 :
7828 64 : return bRet;
7829 : }
7830 :
7831 : /************************************************************************/
7832 : /* OGR_L_WriteArrowBatch() */
7833 : /************************************************************************/
7834 :
7835 : // clang-format off
7836 : /** Writes a batch of rows from an ArrowArray.
7837 : *
7838 : * This is semantically close to calling CreateFeature() with multiple features
7839 : * at once.
7840 : *
7841 : * The ArrowArray must be of type struct (format=+s), and its children generally
7842 : * map to a OGR attribute or geometry field (unless they are struct themselves).
7843 : *
7844 : * Method IsArrowSchemaSupported() can be called to determine if the schema
7845 : * will be supported by WriteArrowBatch().
7846 : *
7847 : * OGR fields for the corresponding children arrays must exist and be of a
7848 : * compatible type. For attribute fields, they should generally be created with
7849 : * CreateFieldFromArrowSchema(). This is strictly required for output drivers
7850 : * Arrow or Parquet, and strongly recommended otherwise. For geometry fields,
7851 : * they should be created either implicitly at CreateLayer() type
7852 : * (if geom_type != wkbNone), or explicitly with CreateGeomField().
7853 : *
7854 : * Starting with GDAL 3.9, some tolerance has been introduced in the base
7855 : * implementation of WriteArrowBatch() for scenarios that involve appending to
7856 : * an already existing output layer when the input Arrow field type and the
7857 : * OGR layer field type are 32/64-bi integers or real number, but do not match
7858 : * exactly, which may cause lossy conversions. The IF_FIELD_NOT_PRESERVED option
7859 : * can be used to control the behavior in case of lossy conversion.
7860 : *
7861 : * Arrays for geometry columns should be of binary or large binary type and
7862 : * contain WKB geometry.
7863 : *
7864 : * Note that the passed array may be set to a released state
7865 : * (array->release==NULL) after this call (not by the base implementation,
7866 : * but in specialized ones such as Parquet or Arrow for example)
7867 : *
7868 : * Supported options of the base implementation are:
7869 : * <ul>
7870 : * <li>FID=name. Name of the FID column in the array. If not provided,
7871 : * GetFIDColumn() is used to determine it. The special name
7872 : * OGRLayer::DEFAULT_ARROW_FID_NAME is also recognized if neither FID nor
7873 : * GetFIDColumn() are set.
7874 : * The corresponding ArrowArray must be of type int32 (i) or int64 (l).
7875 : * On input, values of the FID column are used to create the feature.
7876 : * On output, the values of the FID column may be set with the FID of the
7877 : * created feature (if the array is not released).
7878 : * </li>
7879 : * <li>IF_FID_NOT_PRESERVED=NOTHING/ERROR/WARNING. Action to perform when the
7880 : * input FID is not preserved in the output layer. The default is NOTHING.
7881 : * Setting it to ERROR will cause the function to error out. Setting it
7882 : * to WARNING will cause the function to emit a warning but continue its
7883 : * processing.
7884 : * </li>
7885 : * <li>IF_FIELD_NOT_PRESERVED=ERROR/WARNING. (since GDAL 3.9)
7886 : * Action to perform when the input field value is not preserved in the
7887 : * output layer.
7888 : * The default is WARNING, which will cause the function to emit a warning
7889 : * but continue its processing.
7890 : * Setting it to ERROR will cause the function to error out if a lossy
7891 : * conversion is detected.
7892 : * </li>
7893 : * <li>GEOMETRY_NAME=name. Name of the geometry column. If not provided,
7894 : * GetGeometryColumn() is used. The special name
7895 : * OGRLayer::DEFAULT_ARROW_GEOMETRY_NAME is also recognized if neither
7896 : * GEOMETRY_NAME nor GetGeometryColumn() are set.
7897 : * Geometry columns are also identified if they have
7898 : * ARROW:extension:name=ogc.wkb as a field metadata.
7899 : * The corresponding ArrowArray must be of type binary (w) or large
7900 : * binary (W).
7901 : * </li>
7902 : * </ul>
7903 : *
7904 : * The following example demonstrates how to copy a layer from one format to
7905 : * another one (assuming it has at most a single geometry column):
7906 : \code{.py}
7907 : def copy_layer(src_lyr, out_filename, out_format, lcos = {}):
7908 : stream = src_lyr.GetArrowStream()
7909 : schema = stream.GetSchema()
7910 :
7911 : # If the source layer has a FID column and the output driver supports
7912 : # a FID layer creation option, set it to the source FID column name.
7913 : if src_lyr.GetFIDColumn():
7914 : creationOptions = gdal.GetDriverByName(out_format).GetMetadataItem(
7915 : "DS_LAYER_CREATIONOPTIONLIST"
7916 : )
7917 : if creationOptions and '"FID"' in creationOptions:
7918 : lcos["FID"] = src_lyr.GetFIDColumn()
7919 :
7920 : with ogr.GetDriverByName(out_format).CreateDataSource(out_filename) as out_ds:
7921 : if src_lyr.GetLayerDefn().GetGeomFieldCount() > 1:
7922 : out_lyr = out_ds.CreateLayer(
7923 : src_lyr.GetName(), geom_type=ogr.wkbNone, options=lcos
7924 : )
7925 : for i in range(src_lyr.GetLayerDefn().GetGeomFieldCount()):
7926 : out_lyr.CreateGeomField(src_lyr.GetLayerDefn().GetGeomFieldDefn(i))
7927 : else:
7928 : out_lyr = out_ds.CreateLayer(
7929 : src_lyr.GetName(),
7930 : geom_type=src_lyr.GetGeomType(),
7931 : srs=src_lyr.GetSpatialRef(),
7932 : options=lcos,
7933 : )
7934 :
7935 : success, error_msg = out_lyr.IsArrowSchemaSupported(schema)
7936 : assert success, error_msg
7937 :
7938 : src_geom_field_names = [
7939 : src_lyr.GetLayerDefn().GetGeomFieldDefn(i).GetName()
7940 : for i in range(src_lyr.GetLayerDefn().GetGeomFieldCount())
7941 : ]
7942 : for i in range(schema.GetChildrenCount()):
7943 : # GetArrowStream() may return "OGC_FID" for a unnamed source FID
7944 : # column and "wkb_geometry" for a unnamed source geometry column.
7945 : # Also test GetFIDColumn() and src_geom_field_names if they are
7946 : # named.
7947 : if (
7948 : schema.GetChild(i).GetName()
7949 : not in ("OGC_FID", "wkb_geometry", src_lyr.GetFIDColumn())
7950 : and schema.GetChild(i).GetName() not in src_geom_field_names
7951 : ):
7952 : out_lyr.CreateFieldFromArrowSchema(schema.GetChild(i))
7953 :
7954 : write_options = []
7955 : if src_lyr.GetFIDColumn():
7956 : write_options.append("FID=" + src_lyr.GetFIDColumn())
7957 : if (
7958 : src_lyr.GetLayerDefn().GetGeomFieldCount() == 1
7959 : and src_lyr.GetGeometryColumn()
7960 : ):
7961 : write_options.append("GEOMETRY_NAME=" + src_lyr.GetGeometryColumn())
7962 :
7963 : while True:
7964 : array = stream.GetNextRecordBatch()
7965 : if array is None:
7966 : break
7967 : out_lyr.WriteArrowBatch(schema, array, write_options)
7968 : \endcode
7969 : *
7970 : * This method and CreateFeature() are mutually exclusive in the same session.
7971 : *
7972 : * This method is the same as the C++ method OGRLayer::WriteArrowBatch().
7973 : *
7974 : * @param hLayer Layer.
7975 : * @param schema Schema of array.
7976 : * @param array Array of type struct. It may be released (array->release==NULL)
7977 : * after calling this method.
7978 : * @param papszOptions Options. Null terminated list, or nullptr.
7979 : * @return true in case of success
7980 : * @since 3.8
7981 : */
7982 : // clang-format on
7983 :
7984 58 : bool OGR_L_WriteArrowBatch(OGRLayerH hLayer, const struct ArrowSchema *schema,
7985 : struct ArrowArray *array, char **papszOptions)
7986 : {
7987 58 : VALIDATE_POINTER1(hLayer, __func__, false);
7988 58 : VALIDATE_POINTER1(schema, __func__, false);
7989 58 : VALIDATE_POINTER1(array, __func__, false);
7990 :
7991 116 : return OGRLayer::FromHandle(hLayer)->WriteArrowBatch(schema, array,
7992 58 : papszOptions);
7993 : }
|