Line data Source code
1 : /******************************************************************************
2 : *
3 : * Project: OpenGIS Simple Features Reference Implementation
4 : * Purpose: Parts of OGRLayer dealing with Arrow C interface
5 : * Author: Even Rouault, <even dot rouault at spatialys.com>
6 : *
7 : ******************************************************************************
8 : * Copyright (c) 2022-2023, Even Rouault <even dot rouault at spatialys.com>
9 : *
10 : * SPDX-License-Identifier: MIT
11 : ****************************************************************************/
12 :
13 : #include "ogrsf_frmts.h"
14 : #include "ogr_api.h"
15 : #include "ogr_recordbatch.h"
16 : #include "ograrrowarrayhelper.h"
17 : #include "ogrlayerarrow.h"
18 : #include "ogr_p.h"
19 : #include "ogr_swq.h"
20 : #include "ogr_wkb.h"
21 : #include "ogr_p.h"
22 : #include "ogrlayer_private.h"
23 :
24 : #include "cpl_float.h"
25 : #include "cpl_json.h"
26 : #include "cpl_time.h"
27 :
28 : #include <algorithm>
29 : #include <cassert>
30 : #include <cinttypes>
31 : #include <limits>
32 : #include <utility>
33 : #include <set>
34 :
35 : constexpr const char *MD_GDAL_OGR_TYPE = "GDAL:OGR:type";
36 : constexpr const char *MD_GDAL_OGR_ALTERNATIVE_NAME =
37 : "GDAL:OGR:alternative_name";
38 : constexpr const char *MD_GDAL_OGR_COMMENT = "GDAL:OGR:comment";
39 : constexpr const char *MD_GDAL_OGR_DEFAULT = "GDAL:OGR:default";
40 : constexpr const char *MD_GDAL_OGR_SUBTYPE = "GDAL:OGR:subtype";
41 : constexpr const char *MD_GDAL_OGR_WIDTH = "GDAL:OGR:width";
42 : constexpr const char *MD_GDAL_OGR_UNIQUE = "GDAL:OGR:unique";
43 : constexpr const char *MD_GDAL_OGR_DOMAIN_NAME = "GDAL:OGR:domain_name";
44 :
45 : constexpr char ARROW_LETTER_BOOLEAN = 'b';
46 : constexpr char ARROW_LETTER_INT8 = 'c';
47 : constexpr char ARROW_LETTER_UINT8 = 'C';
48 : constexpr char ARROW_LETTER_INT16 = 's';
49 : constexpr char ARROW_LETTER_UINT16 = 'S';
50 : constexpr char ARROW_LETTER_INT32 = 'i';
51 : constexpr char ARROW_LETTER_UINT32 = 'I';
52 : constexpr char ARROW_LETTER_INT64 = 'l';
53 : constexpr char ARROW_LETTER_UINT64 = 'L';
54 : constexpr char ARROW_LETTER_FLOAT16 = 'e';
55 : constexpr char ARROW_LETTER_FLOAT32 = 'f';
56 : constexpr char ARROW_LETTER_FLOAT64 = 'g';
57 : constexpr char ARROW_LETTER_STRING = 'u';
58 : constexpr char ARROW_LETTER_LARGE_STRING = 'U';
59 : constexpr char ARROW_LETTER_BINARY = 'z';
60 : constexpr char ARROW_LETTER_LARGE_BINARY = 'Z';
61 : constexpr char ARROW_LETTER_DECIMAL = 'd';
62 : constexpr char ARROW_2ND_LETTER_LIST = 'l';
63 : constexpr char ARROW_2ND_LETTER_LARGE_LIST = 'L';
64 :
65 2753520 : static inline bool IsStructure(const char *format)
66 : {
67 2753520 : return format[0] == '+' && format[1] == 's' && format[2] == 0;
68 : }
69 :
70 23124 : static inline bool IsMap(const char *format)
71 : {
72 23124 : return format[0] == '+' && format[1] == 'm' && format[2] == 0;
73 : }
74 :
75 3145 : static inline bool IsFixedWidthBinary(const char *format)
76 : {
77 3145 : return format[0] == 'w' && format[1] == ':';
78 : }
79 :
80 202 : static inline int GetFixedWithBinary(const char *format)
81 : {
82 202 : return atoi(format + strlen("w:"));
83 : }
84 :
85 30331 : static inline bool IsList(const char *format)
86 : {
87 36452 : return format[0] == '+' && format[1] == ARROW_2ND_LETTER_LIST &&
88 36452 : format[2] == 0;
89 : }
90 :
91 20244 : static inline bool IsLargeList(const char *format)
92 : {
93 20368 : return format[0] == '+' && format[1] == ARROW_2ND_LETTER_LARGE_LIST &&
94 20368 : format[2] == 0;
95 : }
96 :
97 33188 : static inline bool IsFixedSizeList(const char *format)
98 : {
99 33188 : return format[0] == '+' && format[1] == 'w' && format[2] == ':';
100 : }
101 :
102 2311 : static inline int GetFixedSizeList(const char *format)
103 : {
104 2311 : return atoi(format + strlen("+w:"));
105 : }
106 :
107 2792 : static inline bool IsDecimal(const char *format)
108 : {
109 2792 : return format[0] == ARROW_LETTER_DECIMAL && format[1] == ':';
110 : }
111 :
112 1342700 : static inline bool IsBoolean(const char *format)
113 : {
114 1342700 : return format[0] == ARROW_LETTER_BOOLEAN && format[1] == 0;
115 : }
116 :
117 1339450 : static inline bool IsInt8(const char *format)
118 : {
119 1339450 : return format[0] == ARROW_LETTER_INT8 && format[1] == 0;
120 : }
121 :
122 1339580 : static inline bool IsUInt8(const char *format)
123 : {
124 1339580 : return format[0] == ARROW_LETTER_UINT8 && format[1] == 0;
125 : }
126 :
127 1338160 : static inline bool IsInt16(const char *format)
128 : {
129 1338160 : return format[0] == ARROW_LETTER_INT16 && format[1] == 0;
130 : }
131 :
132 1338260 : static inline bool IsUInt16(const char *format)
133 : {
134 1338260 : return format[0] == ARROW_LETTER_UINT16 && format[1] == 0;
135 : }
136 :
137 1397780 : static inline bool IsInt32(const char *format)
138 : {
139 1397780 : return format[0] == ARROW_LETTER_INT32 && format[1] == 0;
140 : }
141 :
142 1336340 : static inline bool IsUInt32(const char *format)
143 : {
144 1336340 : return format[0] == ARROW_LETTER_UINT32 && format[1] == 0;
145 : }
146 :
147 1390210 : static inline bool IsInt64(const char *format)
148 : {
149 1390210 : return format[0] == ARROW_LETTER_INT64 && format[1] == 0;
150 : }
151 :
152 1268570 : static inline bool IsUInt64(const char *format)
153 : {
154 1268570 : return format[0] == ARROW_LETTER_UINT64 && format[1] == 0;
155 : }
156 :
157 14922 : static inline bool IsFloat16(const char *format)
158 : {
159 14922 : return format[0] == ARROW_LETTER_FLOAT16 && format[1] == 0;
160 : }
161 :
162 1274980 : static inline bool IsFloat32(const char *format)
163 : {
164 1274980 : return format[0] == ARROW_LETTER_FLOAT32 && format[1] == 0;
165 : }
166 :
167 1266640 : static inline bool IsFloat64(const char *format)
168 : {
169 1266640 : return format[0] == ARROW_LETTER_FLOAT64 && format[1] == 0;
170 : }
171 :
172 2485360 : static inline bool IsString(const char *format)
173 : {
174 2485360 : return format[0] == ARROW_LETTER_STRING && format[1] == 0;
175 : }
176 :
177 74230 : static inline bool IsLargeString(const char *format)
178 : {
179 74230 : return format[0] == ARROW_LETTER_LARGE_STRING && format[1] == 0;
180 : }
181 :
182 79381 : static inline bool IsBinary(const char *format)
183 : {
184 79381 : return format[0] == ARROW_LETTER_BINARY && format[1] == 0;
185 : }
186 :
187 12856 : static inline bool IsLargeBinary(const char *format)
188 : {
189 12856 : return format[0] == ARROW_LETTER_LARGE_BINARY && format[1] == 0;
190 : }
191 :
192 11296 : static inline bool IsTimestampInternal(const char *format, char chType)
193 : {
194 13013 : return format[0] == 't' && format[1] == 's' && format[2] == chType &&
195 13013 : format[3] == ':';
196 : }
197 :
198 3542 : static inline bool IsTimestampSeconds(const char *format)
199 : {
200 3542 : return IsTimestampInternal(format, 's');
201 : }
202 :
203 3532 : static inline bool IsTimestampMilliseconds(const char *format)
204 : {
205 3532 : return IsTimestampInternal(format, 'm');
206 : }
207 :
208 2387 : static inline bool IsTimestampMicroseconds(const char *format)
209 : {
210 2387 : return IsTimestampInternal(format, 'u');
211 : }
212 :
213 1835 : static inline bool IsTimestampNanoseconds(const char *format)
214 : {
215 1835 : return IsTimestampInternal(format, 'n');
216 : }
217 :
218 2784 : static inline bool IsTimestamp(const char *format)
219 : {
220 7260 : return IsTimestampSeconds(format) || IsTimestampMilliseconds(format) ||
221 7260 : IsTimestampMicroseconds(format) || IsTimestampNanoseconds(format);
222 : }
223 :
224 107 : static inline const char *GetTimestampTimezone(const char *format)
225 : {
226 107 : return IsTimestamp(format) ? format + strlen("tm?:") : "";
227 : }
228 :
229 : /************************************************************************/
230 : /* TestBit() */
231 : /************************************************************************/
232 :
233 12949 : inline bool TestBit(const uint8_t *pabyData, size_t nIdx)
234 : {
235 12949 : return (pabyData[nIdx / 8] & (1 << (nIdx % 8))) != 0;
236 : }
237 :
238 : /************************************************************************/
239 : /* SetBit() */
240 : /************************************************************************/
241 :
242 9596 : inline void SetBit(uint8_t *pabyData, size_t nIdx)
243 : {
244 9596 : pabyData[nIdx / 8] |= (1 << (nIdx % 8));
245 9596 : }
246 :
247 : /************************************************************************/
248 : /* UnsetBit() */
249 : /************************************************************************/
250 :
251 12274 : inline void UnsetBit(uint8_t *pabyData, size_t nIdx)
252 : {
253 12274 : pabyData[nIdx / 8] &= uint8_t(~(1 << (nIdx % 8)));
254 12274 : }
255 :
256 : /************************************************************************/
257 : /* DefaultReleaseSchema() */
258 : /************************************************************************/
259 :
260 25331 : static void OGRLayerReleaseSchema(struct ArrowSchema *schema,
261 : bool bFullFreeFormat)
262 : {
263 25331 : CPLAssert(schema->release != nullptr);
264 25331 : if (bFullFreeFormat || STARTS_WITH(schema->format, "w:") ||
265 25299 : STARTS_WITH(schema->format, "tsm:"))
266 : {
267 1033 : CPLFree(const_cast<char *>(schema->format));
268 : }
269 25331 : CPLFree(const_cast<char *>(schema->name));
270 25331 : CPLFree(const_cast<char *>(schema->metadata));
271 25331 : if (schema->children)
272 : {
273 25979 : for (int i = 0; i < static_cast<int>(schema->n_children); ++i)
274 : {
275 22751 : if (schema->children[i] && schema->children[i]->release)
276 : {
277 22751 : schema->children[i]->release(schema->children[i]);
278 22751 : CPLFree(schema->children[i]);
279 : }
280 : }
281 3228 : CPLFree(schema->children);
282 : }
283 25331 : if (schema->dictionary)
284 : {
285 32 : if (schema->dictionary->release)
286 : {
287 32 : schema->dictionary->release(schema->dictionary);
288 32 : CPLFree(schema->dictionary);
289 : }
290 : }
291 25331 : schema->release = nullptr;
292 25331 : }
293 :
294 25308 : static void OGRLayerPartialReleaseSchema(struct ArrowSchema *schema)
295 : {
296 25308 : OGRLayerReleaseSchema(schema, /* bFullFreeFormat = */ false);
297 25308 : }
298 :
299 23 : static void OGRLayerFullReleaseSchema(struct ArrowSchema *schema)
300 : {
301 23 : OGRLayerReleaseSchema(schema, /* bFullFreeFormat = */ true);
302 23 : }
303 :
304 : /** Release a ArrowSchema.
305 : *
306 : * To be used by driver implementations that have a custom GetArrowStream()
307 : * implementation.
308 : *
309 : * @param schema Schema to release.
310 : * @since GDAL 3.6
311 : */
312 :
313 25276 : void OGRLayer::ReleaseSchema(struct ArrowSchema *schema)
314 : {
315 25276 : OGRLayerPartialReleaseSchema(schema);
316 25276 : }
317 :
318 : /************************************************************************/
319 : /* AddDictToSchema() */
320 : /************************************************************************/
321 :
322 32 : static void AddDictToSchema(struct ArrowSchema *psChild,
323 : const OGRCodedFieldDomain *poCodedDomain)
324 : {
325 32 : const OGRCodedValue *psIter = poCodedDomain->GetEnumeration();
326 32 : int nLastCode = -1;
327 32 : int nCountNull = 0;
328 32 : uint32_t nCountChars = 0;
329 112 : for (; psIter->pszCode; ++psIter)
330 : {
331 80 : if (CPLGetValueType(psIter->pszCode) != CPL_VALUE_INTEGER)
332 : {
333 0 : return;
334 : }
335 80 : int nCode = atoi(psIter->pszCode);
336 80 : if (nCode <= nLastCode || nCode - nLastCode > 100)
337 : {
338 0 : return;
339 : }
340 106 : for (int i = nLastCode + 1; i < nCode; ++i)
341 : {
342 26 : nCountNull++;
343 : }
344 80 : if (psIter->pszValue != nullptr)
345 : {
346 54 : const size_t nLen = strlen(psIter->pszValue);
347 54 : if (nLen > std::numeric_limits<uint32_t>::max() - nCountChars)
348 0 : return;
349 54 : nCountChars += static_cast<uint32_t>(nLen);
350 : }
351 : else
352 26 : nCountNull++;
353 80 : nLastCode = nCode;
354 : }
355 :
356 : auto psChildDict = static_cast<struct ArrowSchema *>(
357 32 : CPLCalloc(1, sizeof(struct ArrowSchema)));
358 32 : psChild->dictionary = psChildDict;
359 32 : psChildDict->release = OGRLayerPartialReleaseSchema;
360 32 : psChildDict->name = CPLStrdup(poCodedDomain->GetName().c_str());
361 32 : psChildDict->format = "u";
362 32 : if (nCountNull)
363 26 : psChildDict->flags = ARROW_FLAG_NULLABLE;
364 : }
365 :
366 : /************************************************************************/
367 : /* DefaultGetArrowSchema() */
368 : /************************************************************************/
369 :
370 : /** Default implementation of the ArrowArrayStream::get_schema() callback.
371 : *
372 : * To be used by driver implementations that have a custom GetArrowStream()
373 : * implementation.
374 : *
375 : * @since GDAL 3.6
376 : */
377 2214 : int OGRLayer::GetArrowSchema(struct ArrowArrayStream *,
378 : struct ArrowSchema *out_schema)
379 : {
380 2214 : const bool bIncludeFID = CPLTestBool(
381 : m_aosArrowArrayStreamOptions.FetchNameValueDef("INCLUDE_FID", "YES"));
382 2214 : const bool bDateTimeAsString = m_aosArrowArrayStreamOptions.FetchBool(
383 : GAS_OPT_DATETIME_AS_STRING, false);
384 2214 : memset(out_schema, 0, sizeof(*out_schema));
385 2214 : out_schema->format = "+s";
386 2214 : out_schema->name = CPLStrdup("");
387 2214 : out_schema->metadata = nullptr;
388 2214 : auto poLayerDefn = GetLayerDefn();
389 2214 : const int nFieldCount = poLayerDefn->GetFieldCount();
390 2214 : const int nGeomFieldCount = poLayerDefn->GetGeomFieldCount();
391 2214 : const int nChildren = 1 + nFieldCount + nGeomFieldCount;
392 :
393 2214 : out_schema->children = static_cast<struct ArrowSchema **>(
394 2214 : CPLCalloc(nChildren, sizeof(struct ArrowSchema *)));
395 2214 : int iSchemaChild = 0;
396 2214 : if (bIncludeFID)
397 : {
398 3924 : out_schema->children[iSchemaChild] = static_cast<struct ArrowSchema *>(
399 1962 : CPLCalloc(1, sizeof(struct ArrowSchema)));
400 1962 : auto psChild = out_schema->children[iSchemaChild];
401 1962 : ++iSchemaChild;
402 1962 : psChild->release = OGRLayer::ReleaseSchema;
403 1962 : const char *pszFIDName = GetFIDColumn();
404 1962 : psChild->name =
405 1962 : CPLStrdup((pszFIDName && pszFIDName[0]) ? pszFIDName
406 : : DEFAULT_ARROW_FID_NAME);
407 1962 : psChild->format = "l";
408 : }
409 20104 : for (int i = 0; i < nFieldCount; ++i)
410 : {
411 17890 : const auto poFieldDefn = poLayerDefn->GetFieldDefn(i);
412 17890 : if (poFieldDefn->IsIgnored())
413 : {
414 40 : continue;
415 : }
416 :
417 35700 : out_schema->children[iSchemaChild] = static_cast<struct ArrowSchema *>(
418 17850 : CPLCalloc(1, sizeof(struct ArrowSchema)));
419 17850 : auto psChild = out_schema->children[iSchemaChild];
420 17850 : ++iSchemaChild;
421 17850 : psChild->release = OGRLayer::ReleaseSchema;
422 17850 : psChild->name = CPLStrdup(poFieldDefn->GetNameRef());
423 17850 : if (poFieldDefn->IsNullable())
424 17064 : psChild->flags = ARROW_FLAG_NULLABLE;
425 17850 : const auto eType = poFieldDefn->GetType();
426 17850 : const auto eSubType = poFieldDefn->GetSubType();
427 17850 : const char *item_format = nullptr;
428 17850 : switch (eType)
429 : {
430 5727 : case OFTInteger:
431 : {
432 5727 : if (eSubType == OFSTBoolean)
433 286 : psChild->format = "b";
434 5441 : else if (eSubType == OFSTInt16)
435 673 : psChild->format = "s";
436 : else
437 4768 : psChild->format = "i";
438 :
439 5727 : const auto &osDomainName = poFieldDefn->GetDomainName();
440 5727 : if (!osDomainName.empty())
441 : {
442 32 : auto poDS = GetDataset();
443 32 : if (poDS)
444 : {
445 : const auto poFieldDomain =
446 32 : poDS->GetFieldDomain(osDomainName);
447 64 : if (poFieldDomain &&
448 32 : poFieldDomain->GetDomainType() == OFDT_CODED)
449 : {
450 32 : const OGRCodedFieldDomain *poCodedDomain =
451 : static_cast<const OGRCodedFieldDomain *>(
452 : poFieldDomain);
453 32 : AddDictToSchema(psChild, poCodedDomain);
454 : }
455 : }
456 : }
457 :
458 5727 : break;
459 : }
460 :
461 563 : case OFTInteger64:
462 563 : psChild->format = "l";
463 563 : break;
464 :
465 2880 : case OFTReal:
466 : {
467 2880 : if (eSubType == OFSTFloat32)
468 676 : psChild->format = "f";
469 : else
470 2204 : psChild->format = "g";
471 2880 : break;
472 : }
473 :
474 5116 : case OFTString:
475 : case OFTWideString:
476 5116 : psChild->format = "u";
477 5116 : break;
478 :
479 1209 : case OFTBinary:
480 : {
481 1209 : if (poFieldDefn->GetWidth() > 0)
482 9 : psChild->format =
483 9 : CPLStrdup(CPLSPrintf("w:%d", poFieldDefn->GetWidth()));
484 : else
485 1200 : psChild->format = "z";
486 1209 : break;
487 : }
488 :
489 383 : case OFTIntegerList:
490 : {
491 383 : if (eSubType == OFSTBoolean)
492 92 : item_format = "b";
493 291 : else if (eSubType == OFSTInt16)
494 67 : item_format = "s";
495 : else
496 224 : item_format = "i";
497 383 : break;
498 : }
499 :
500 97 : case OFTInteger64List:
501 97 : item_format = "l";
502 97 : break;
503 :
504 256 : case OFTRealList:
505 : {
506 256 : if (eSubType == OFSTFloat32)
507 84 : item_format = "f";
508 : else
509 172 : item_format = "g";
510 256 : break;
511 : }
512 :
513 273 : case OFTStringList:
514 : case OFTWideStringList:
515 273 : item_format = "u";
516 273 : break;
517 :
518 210 : case OFTDate:
519 210 : psChild->format = "tdD";
520 210 : break;
521 :
522 118 : case OFTTime:
523 118 : psChild->format = "ttm";
524 118 : break;
525 :
526 1018 : case OFTDateTime:
527 : {
528 1018 : const char *pszPrefix = "tsm:";
529 : const char *pszTZOverride =
530 1018 : m_aosArrowArrayStreamOptions.FetchNameValue("TIMEZONE");
531 1018 : if (bDateTimeAsString)
532 : {
533 17 : psChild->format = "u";
534 : }
535 1001 : else if (pszTZOverride && EQUAL(pszTZOverride, "unknown"))
536 : {
537 2 : psChild->format = CPLStrdup(pszPrefix);
538 : }
539 999 : else if (pszTZOverride)
540 : {
541 40 : psChild->format = CPLStrdup(
542 80 : (std::string(pszPrefix) + pszTZOverride).c_str());
543 : }
544 : else
545 : {
546 959 : const int nTZFlag = poFieldDefn->GetTZFlag();
547 959 : if (nTZFlag == OGR_TZFLAG_MIXED_TZ ||
548 : nTZFlag == OGR_TZFLAG_UTC)
549 : {
550 7 : psChild->format =
551 7 : CPLStrdup(CPLSPrintf("%sUTC", pszPrefix));
552 : }
553 952 : else if (nTZFlag == OGR_TZFLAG_UNKNOWN ||
554 : nTZFlag == OGR_TZFLAG_LOCALTIME)
555 : {
556 936 : psChild->format = CPLStrdup(pszPrefix);
557 : }
558 : else
559 : {
560 16 : psChild->format = CPLStrdup(
561 32 : (pszPrefix + OGRTZFlagToTimezone(nTZFlag, "UTC"))
562 : .c_str());
563 : }
564 : }
565 1018 : break;
566 : }
567 : }
568 :
569 17850 : if (item_format)
570 : {
571 1009 : psChild->format = "+l";
572 1009 : psChild->n_children = 1;
573 1009 : psChild->children = static_cast<struct ArrowSchema **>(
574 1009 : CPLCalloc(1, sizeof(struct ArrowSchema *)));
575 2018 : psChild->children[0] = static_cast<struct ArrowSchema *>(
576 1009 : CPLCalloc(1, sizeof(struct ArrowSchema)));
577 1009 : psChild->children[0]->release = OGRLayer::ReleaseSchema;
578 1009 : psChild->children[0]->name = CPLStrdup("item");
579 1009 : psChild->children[0]->format = item_format;
580 : }
581 :
582 35700 : std::vector<std::pair<std::string, std::string>> oMetadata;
583 :
584 17850 : if (eType == OFTDateTime && bDateTimeAsString)
585 : {
586 : oMetadata.emplace_back(
587 17 : std::pair(MD_GDAL_OGR_TYPE, OGR_GetFieldTypeName(eType)));
588 : }
589 :
590 17850 : const char *pszAlternativeName = poFieldDefn->GetAlternativeNameRef();
591 17850 : if (pszAlternativeName && pszAlternativeName[0])
592 : oMetadata.emplace_back(
593 262 : std::pair(MD_GDAL_OGR_ALTERNATIVE_NAME, pszAlternativeName));
594 :
595 17850 : const char *pszDefault = poFieldDefn->GetDefault();
596 17850 : if (pszDefault && pszDefault[0])
597 42 : oMetadata.emplace_back(std::pair(MD_GDAL_OGR_DEFAULT, pszDefault));
598 :
599 17850 : const std::string &osComment = poFieldDefn->GetComment();
600 17850 : if (!osComment.empty())
601 10 : oMetadata.emplace_back(std::pair(MD_GDAL_OGR_COMMENT, osComment));
602 :
603 17850 : if (eType == OFTString && eSubType == OFSTJSON)
604 : {
605 130 : oMetadata.emplace_back(
606 130 : std::pair(ARROW_EXTENSION_NAME_KEY, EXTENSION_NAME_ARROW_JSON));
607 : }
608 17720 : else if (eSubType != OFSTNone && eSubType != OFSTBoolean &&
609 : eSubType != OFSTFloat32)
610 : {
611 0 : oMetadata.emplace_back(std::pair(
612 741 : MD_GDAL_OGR_SUBTYPE, OGR_GetFieldSubTypeName(eSubType)));
613 : }
614 17850 : if (eType == OFTString && poFieldDefn->GetWidth() > 0)
615 : {
616 0 : oMetadata.emplace_back(std::pair(
617 639 : MD_GDAL_OGR_WIDTH, CPLSPrintf("%d", poFieldDefn->GetWidth())));
618 : }
619 17850 : if (poFieldDefn->IsUnique())
620 : {
621 10 : oMetadata.emplace_back(std::pair(MD_GDAL_OGR_UNIQUE, "true"));
622 : }
623 17850 : if (!poFieldDefn->GetDomainName().empty())
624 : {
625 64 : oMetadata.emplace_back(std::pair(MD_GDAL_OGR_DOMAIN_NAME,
626 64 : poFieldDefn->GetDomainName()));
627 : }
628 :
629 17850 : if (!oMetadata.empty())
630 : {
631 1853 : uint64_t nLen64 = sizeof(int32_t);
632 3736 : for (const auto &oPair : oMetadata)
633 : {
634 1883 : nLen64 += sizeof(int32_t);
635 1883 : nLen64 += oPair.first.size();
636 1883 : nLen64 += sizeof(int32_t);
637 1883 : nLen64 += oPair.second.size();
638 : }
639 1853 : if (nLen64 <
640 1853 : static_cast<uint64_t>(std::numeric_limits<int32_t>::max()))
641 : {
642 1853 : const size_t nLen = static_cast<size_t>(nLen64);
643 1853 : char *pszMetadata = static_cast<char *>(CPLMalloc(nLen));
644 1853 : psChild->metadata = pszMetadata;
645 1853 : size_t offsetMD = 0;
646 1853 : int32_t nSize = static_cast<int>(oMetadata.size());
647 1853 : memcpy(pszMetadata + offsetMD, &nSize, sizeof(nSize));
648 1853 : offsetMD += sizeof(int32_t);
649 3736 : for (const auto &oPair : oMetadata)
650 : {
651 1883 : nSize = static_cast<int32_t>(oPair.first.size());
652 1883 : memcpy(pszMetadata + offsetMD, &nSize, sizeof(nSize));
653 1883 : offsetMD += sizeof(int32_t);
654 1883 : memcpy(pszMetadata + offsetMD, oPair.first.data(),
655 : oPair.first.size());
656 1883 : offsetMD += oPair.first.size();
657 :
658 1883 : nSize = static_cast<int32_t>(oPair.second.size());
659 1883 : memcpy(pszMetadata + offsetMD, &nSize, sizeof(nSize));
660 1883 : offsetMD += sizeof(int32_t);
661 1883 : memcpy(pszMetadata + offsetMD, oPair.second.data(),
662 : oPair.second.size());
663 1883 : offsetMD += oPair.second.size();
664 : }
665 :
666 1853 : CPLAssert(offsetMD == nLen);
667 1853 : CPL_IGNORE_RET_VAL(offsetMD);
668 : }
669 : else
670 : {
671 : // Extremely unlikely !
672 0 : CPLError(CE_Warning, CPLE_AppDefined,
673 : "Cannot write ArrowSchema::metadata due to "
674 : "too large content");
675 : }
676 : }
677 : }
678 :
679 : const char *const pszGeometryMetadataEncoding =
680 2214 : m_aosArrowArrayStreamOptions.FetchNameValue(
681 : "GEOMETRY_METADATA_ENCODING");
682 2214 : const char *pszExtensionName = EXTENSION_NAME_OGC_WKB;
683 2214 : if (pszGeometryMetadataEncoding)
684 : {
685 6 : if (EQUAL(pszGeometryMetadataEncoding, "OGC"))
686 0 : pszExtensionName = EXTENSION_NAME_OGC_WKB;
687 6 : else if (EQUAL(pszGeometryMetadataEncoding, "GEOARROW"))
688 6 : pszExtensionName = EXTENSION_NAME_GEOARROW_WKB;
689 : else
690 0 : CPLError(CE_Warning, CPLE_NotSupported,
691 : "Unsupported GEOMETRY_METADATA_ENCODING value: %s",
692 : pszGeometryMetadataEncoding);
693 : }
694 4141 : for (int i = 0; i < nGeomFieldCount; ++i)
695 : {
696 1927 : const auto poFieldDefn = poLayerDefn->GetGeomFieldDefn(i);
697 1927 : if (poFieldDefn->IsIgnored())
698 : {
699 15 : continue;
700 : }
701 :
702 1912 : out_schema->children[iSchemaChild] = CreateSchemaForWKBGeometryColumn(
703 : poFieldDefn, "z", pszExtensionName);
704 :
705 1912 : ++iSchemaChild;
706 : }
707 :
708 2214 : out_schema->n_children = iSchemaChild;
709 2214 : out_schema->release = OGRLayer::ReleaseSchema;
710 2214 : return 0;
711 : }
712 :
713 : /************************************************************************/
714 : /* CreateSchemaForWKBGeometryColumn() */
715 : /************************************************************************/
716 :
717 : /** Return a ArrowSchema* corresponding to the WKB encoding of a geometry
718 : * column.
719 : */
720 :
721 : /* static */
722 : struct ArrowSchema *
723 2241 : OGRLayer::CreateSchemaForWKBGeometryColumn(const OGRGeomFieldDefn *poFieldDefn,
724 : const char *pszArrowFormat,
725 : const char *pszExtensionName)
726 : {
727 2241 : CPLAssert(strcmp(pszArrowFormat, "z") == 0 ||
728 : strcmp(pszArrowFormat, "Z") == 0);
729 2241 : if (!EQUAL(pszExtensionName, EXTENSION_NAME_OGC_WKB) &&
730 6 : !EQUAL(pszExtensionName, EXTENSION_NAME_GEOARROW_WKB))
731 : {
732 0 : CPLError(CE_Failure, CPLE_NotSupported,
733 : "Unsupported extension name '%s'. Defaulting to '%s'",
734 : pszExtensionName, EXTENSION_NAME_OGC_WKB);
735 0 : pszExtensionName = EXTENSION_NAME_OGC_WKB;
736 : }
737 : auto psSchema = static_cast<struct ArrowSchema *>(
738 2241 : CPLCalloc(1, sizeof(struct ArrowSchema)));
739 2241 : psSchema->release = OGRLayer::ReleaseSchema;
740 2241 : const char *pszGeomFieldName = poFieldDefn->GetNameRef();
741 2241 : if (pszGeomFieldName[0] == '\0')
742 773 : pszGeomFieldName = DEFAULT_ARROW_GEOMETRY_NAME;
743 2241 : psSchema->name = CPLStrdup(pszGeomFieldName);
744 2241 : if (poFieldDefn->IsNullable())
745 2212 : psSchema->flags = ARROW_FLAG_NULLABLE;
746 2241 : psSchema->format = strcmp(pszArrowFormat, "z") == 0 ? "z" : "Z";
747 2241 : std::string osExtensionMetadata;
748 2241 : if (EQUAL(pszExtensionName, EXTENSION_NAME_GEOARROW_WKB))
749 : {
750 6 : const auto poSRS = poFieldDefn->GetSpatialRef();
751 6 : if (poSRS)
752 : {
753 3 : char *pszPROJJSON = nullptr;
754 3 : poSRS->exportToPROJJSON(&pszPROJJSON, nullptr);
755 3 : if (pszPROJJSON)
756 : {
757 3 : osExtensionMetadata = "{\"crs\":";
758 3 : osExtensionMetadata += pszPROJJSON;
759 3 : osExtensionMetadata += '}';
760 3 : CPLFree(pszPROJJSON);
761 : }
762 : else
763 : {
764 0 : CPLError(CE_Warning, CPLE_AppDefined,
765 : "Cannot export CRS of geometry field %s to PROJJSON",
766 : poFieldDefn->GetNameRef());
767 : }
768 : }
769 : }
770 2241 : size_t nLen = sizeof(int32_t) + sizeof(int32_t) +
771 : strlen(ARROW_EXTENSION_NAME_KEY) + sizeof(int32_t) +
772 2241 : strlen(pszExtensionName);
773 2241 : if (!osExtensionMetadata.empty())
774 : {
775 3 : nLen += sizeof(int32_t) + strlen(ARROW_EXTENSION_METADATA_KEY) +
776 3 : sizeof(int32_t) + osExtensionMetadata.size();
777 : }
778 2241 : char *pszMetadata = static_cast<char *>(CPLMalloc(nLen));
779 2241 : psSchema->metadata = pszMetadata;
780 2241 : size_t offsetMD = 0;
781 2241 : *reinterpret_cast<int32_t *>(pszMetadata + offsetMD) =
782 2241 : osExtensionMetadata.empty() ? 1 : 2;
783 2241 : offsetMD += sizeof(int32_t);
784 2241 : *reinterpret_cast<int32_t *>(pszMetadata + offsetMD) =
785 : static_cast<int32_t>(strlen(ARROW_EXTENSION_NAME_KEY));
786 2241 : offsetMD += sizeof(int32_t);
787 2241 : memcpy(pszMetadata + offsetMD, ARROW_EXTENSION_NAME_KEY,
788 : strlen(ARROW_EXTENSION_NAME_KEY));
789 2241 : offsetMD += static_cast<int>(strlen(ARROW_EXTENSION_NAME_KEY));
790 2241 : *reinterpret_cast<int32_t *>(pszMetadata + offsetMD) =
791 2241 : static_cast<int32_t>(strlen(pszExtensionName));
792 2241 : offsetMD += sizeof(int32_t);
793 2241 : memcpy(pszMetadata + offsetMD, pszExtensionName, strlen(pszExtensionName));
794 2241 : offsetMD += strlen(pszExtensionName);
795 2241 : if (!osExtensionMetadata.empty())
796 : {
797 3 : *reinterpret_cast<int32_t *>(pszMetadata + offsetMD) =
798 : static_cast<int32_t>(strlen(ARROW_EXTENSION_METADATA_KEY));
799 3 : offsetMD += sizeof(int32_t);
800 3 : memcpy(pszMetadata + offsetMD, ARROW_EXTENSION_METADATA_KEY,
801 : strlen(ARROW_EXTENSION_METADATA_KEY));
802 3 : offsetMD += static_cast<int>(strlen(ARROW_EXTENSION_METADATA_KEY));
803 3 : *reinterpret_cast<int32_t *>(pszMetadata + offsetMD) =
804 3 : static_cast<int32_t>(osExtensionMetadata.size());
805 3 : offsetMD += sizeof(int32_t);
806 3 : memcpy(pszMetadata + offsetMD, osExtensionMetadata.c_str(),
807 : osExtensionMetadata.size());
808 3 : offsetMD += osExtensionMetadata.size();
809 : }
810 2241 : CPLAssert(offsetMD == nLen);
811 2241 : CPL_IGNORE_RET_VAL(offsetMD);
812 4482 : return psSchema;
813 : }
814 :
815 : /************************************************************************/
816 : /* StaticGetArrowSchema() */
817 : /************************************************************************/
818 :
819 : /** Default implementation of the ArrowArrayStream::get_schema() callback.
820 : *
821 : * To be used by driver implementations that have a custom GetArrowStream()
822 : * implementation.
823 : *
824 : * @since GDAL 3.6
825 : */
826 2434 : int OGRLayer::StaticGetArrowSchema(struct ArrowArrayStream *stream,
827 : struct ArrowSchema *out_schema)
828 : {
829 : auto poLayer = static_cast<ArrowArrayStreamPrivateDataSharedDataWrapper *>(
830 2434 : stream->private_data)
831 2434 : ->poShared->m_poLayer;
832 2434 : if (poLayer == nullptr)
833 : {
834 1 : CPLError(CE_Failure, CPLE_NotSupported,
835 : "Calling get_schema() on a freed OGRLayer is not supported");
836 1 : return EINVAL;
837 : }
838 2433 : return poLayer->GetArrowSchema(stream, out_schema);
839 : }
840 :
841 : /************************************************************************/
842 : /* DefaultReleaseArray() */
843 : /************************************************************************/
844 :
845 34759 : static void OGRLayerDefaultReleaseArray(struct ArrowArray *array)
846 : {
847 34759 : if (array->buffers)
848 : {
849 109245 : for (int i = 0; i < static_cast<int>(array->n_buffers); ++i)
850 74486 : VSIFreeAligned(const_cast<void *>(array->buffers[i]));
851 34759 : CPLFree(array->buffers);
852 : }
853 34759 : if (array->children)
854 : {
855 40699 : for (int i = 0; i < static_cast<int>(array->n_children); ++i)
856 : {
857 32969 : if (array->children[i] && array->children[i]->release)
858 : {
859 32596 : array->children[i]->release(array->children[i]);
860 32596 : CPLFree(array->children[i]);
861 : }
862 : }
863 7730 : CPLFree(array->children);
864 : }
865 34759 : if (array->dictionary)
866 : {
867 148 : if (array->dictionary->release)
868 : {
869 148 : array->dictionary->release(array->dictionary);
870 148 : CPLFree(array->dictionary);
871 : }
872 : }
873 34759 : array->release = nullptr;
874 34759 : }
875 :
876 : /** Release a ArrowArray.
877 : *
878 : * To be used by driver implementations that have a custom GetArrowStream()
879 : * implementation.
880 : *
881 : * @param array Arrow array to release.
882 : * @since GDAL 3.6
883 : */
884 3935 : void OGRLayer::ReleaseArray(struct ArrowArray *array)
885 : {
886 3935 : OGRLayerDefaultReleaseArray(array);
887 3935 : }
888 :
889 : /************************************************************************/
890 : /* IsValidField() */
891 : /************************************************************************/
892 :
893 88473 : static inline bool IsValidField(const OGRField *psRawField)
894 : {
895 103605 : return (!(psRawField->Set.nMarker1 == OGRUnsetMarker &&
896 7566 : psRawField->Set.nMarker2 == OGRUnsetMarker &&
897 176946 : psRawField->Set.nMarker3 == OGRUnsetMarker) &&
898 80907 : !(psRawField->Set.nMarker1 == OGRNullMarker &&
899 3210 : psRawField->Set.nMarker2 == OGRNullMarker &&
900 91683 : psRawField->Set.nMarker3 == OGRNullMarker));
901 : }
902 :
903 : /************************************************************************/
904 : /* AllocValidityBitmap() */
905 : /************************************************************************/
906 :
907 3503 : static uint8_t *AllocValidityBitmap(size_t nSize)
908 : {
909 : auto pabyValidity = static_cast<uint8_t *>(
910 3503 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE((1 + nSize + 7) / 8));
911 3503 : if (pabyValidity)
912 : {
913 : // All valid initially
914 3503 : memset(pabyValidity, 0xFF, (nSize + 7) / 8);
915 : }
916 3503 : return pabyValidity;
917 : }
918 :
919 : /************************************************************************/
920 : /* FillArray() */
921 : /************************************************************************/
922 :
923 : template <class T, typename TMember>
924 5802 : static bool FillArray(struct ArrowArray *psChild,
925 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
926 : const size_t nFeatureCountLimit, const bool bIsNullable,
927 : TMember member, const int i)
928 : {
929 5802 : psChild->n_buffers = 2;
930 5802 : psChild->buffers = static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
931 5802 : uint8_t *pabyValidity = nullptr;
932 : T *panValues = static_cast<T *>(
933 5802 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(T) * (1 + nFeatureCountLimit)));
934 5802 : if (panValues == nullptr)
935 0 : return false;
936 5802 : psChild->buffers[1] = panValues;
937 53690 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat)
938 : {
939 47888 : auto &poFeature = apoFeatures[iFeat];
940 47888 : const auto psRawField = poFeature->GetRawFieldRef(i);
941 47888 : if (IsValidField(psRawField))
942 : {
943 42973 : panValues[iFeat] = static_cast<T>((*psRawField).*member);
944 : }
945 4915 : else if (bIsNullable)
946 : {
947 4915 : panValues[iFeat] = 0;
948 4915 : ++psChild->null_count;
949 4915 : if (pabyValidity == nullptr)
950 : {
951 1232 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
952 1232 : psChild->buffers[0] = pabyValidity;
953 1232 : if (pabyValidity == nullptr)
954 0 : return false;
955 : }
956 4915 : UnsetBit(pabyValidity, iFeat);
957 : }
958 : else
959 : {
960 0 : panValues[iFeat] = 0;
961 : }
962 : }
963 5802 : return true;
964 : }
965 :
966 : /************************************************************************/
967 : /* FillBoolArray() */
968 : /************************************************************************/
969 :
970 : template <typename TMember>
971 138 : static bool FillBoolArray(struct ArrowArray *psChild,
972 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
973 : const size_t nFeatureCountLimit,
974 : const bool bIsNullable, TMember member, const int i)
975 : {
976 138 : psChild->n_buffers = 2;
977 138 : psChild->buffers = static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
978 138 : uint8_t *pabyValidity = nullptr;
979 : uint8_t *panValues = static_cast<uint8_t *>(
980 138 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE((nFeatureCountLimit + 7 + 1) / 8));
981 138 : if (panValues == nullptr)
982 0 : return false;
983 138 : memset(panValues, 0, (nFeatureCountLimit + 7) / 8);
984 138 : psChild->buffers[1] = panValues;
985 601 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat)
986 : {
987 463 : auto &poFeature = apoFeatures[iFeat];
988 463 : const auto psRawField = poFeature->GetRawFieldRef(i);
989 463 : if (IsValidField(psRawField))
990 : {
991 405 : if ((*psRawField).*member)
992 81 : SetBit(panValues, iFeat);
993 : }
994 58 : else if (bIsNullable)
995 : {
996 58 : ++psChild->null_count;
997 58 : if (pabyValidity == nullptr)
998 : {
999 46 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1000 46 : psChild->buffers[0] = pabyValidity;
1001 46 : if (pabyValidity == nullptr)
1002 0 : return false;
1003 : }
1004 58 : UnsetBit(pabyValidity, iFeat);
1005 : }
1006 : }
1007 138 : return true;
1008 : }
1009 :
1010 : /************************************************************************/
1011 : /* FillListArray() */
1012 : /************************************************************************/
1013 :
1014 : struct GetFromIntegerList
1015 : {
1016 555 : static inline int getCount(const OGRField *psRawField)
1017 : {
1018 555 : return psRawField->IntegerList.nCount;
1019 : }
1020 :
1021 276 : static inline const int *getValues(const OGRField *psRawField)
1022 : {
1023 276 : return psRawField->IntegerList.paList;
1024 : }
1025 : };
1026 :
1027 : struct GetFromInteger64List
1028 : {
1029 242 : static inline int getCount(const OGRField *psRawField)
1030 : {
1031 242 : return psRawField->Integer64List.nCount;
1032 : }
1033 :
1034 120 : static inline const GIntBig *getValues(const OGRField *psRawField)
1035 : {
1036 120 : return psRawField->Integer64List.paList;
1037 : }
1038 : };
1039 :
1040 : struct GetFromRealList
1041 : {
1042 374 : static inline int getCount(const OGRField *psRawField)
1043 : {
1044 374 : return psRawField->RealList.nCount;
1045 : }
1046 :
1047 186 : static inline const double *getValues(const OGRField *psRawField)
1048 : {
1049 186 : return psRawField->RealList.paList;
1050 : }
1051 : };
1052 :
1053 : template <class OffsetType, class T, class GetFromList>
1054 : static size_t
1055 416 : FillListArray(struct ArrowArray *psChild,
1056 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1057 : const size_t nFeatureCountLimit, const bool bIsNullable,
1058 : const int i, const size_t nMemLimit)
1059 : {
1060 416 : psChild->n_buffers = 2;
1061 416 : psChild->buffers = static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1062 416 : uint8_t *pabyValidity = nullptr;
1063 : OffsetType *panOffsets =
1064 416 : static_cast<OffsetType *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
1065 : sizeof(OffsetType) * (1 + nFeatureCountLimit)));
1066 416 : if (panOffsets == nullptr)
1067 0 : return 0;
1068 416 : psChild->buffers[1] = panOffsets;
1069 :
1070 416 : OffsetType nOffset = 0;
1071 416 : size_t nFeatCount = 0;
1072 1445 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat, ++nFeatCount)
1073 : {
1074 1035 : panOffsets[iFeat] = nOffset;
1075 1035 : auto &poFeature = apoFeatures[iFeat];
1076 1035 : const auto psRawField = poFeature->GetRawFieldRef(i);
1077 1035 : if (IsValidField(psRawField))
1078 : {
1079 529 : const unsigned nCount = GetFromList::getCount(psRawField);
1080 529 : if (nCount > static_cast<size_t>(nMemLimit - nOffset))
1081 : {
1082 6 : if (nFeatCount == 0)
1083 3 : return 0;
1084 3 : break;
1085 : }
1086 523 : nOffset += static_cast<OffsetType>(nCount);
1087 : }
1088 506 : else if (bIsNullable)
1089 : {
1090 506 : ++psChild->null_count;
1091 506 : if (pabyValidity == nullptr)
1092 : {
1093 231 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1094 231 : psChild->buffers[0] = pabyValidity;
1095 231 : if (pabyValidity == nullptr)
1096 0 : return 0;
1097 : }
1098 506 : UnsetBit(pabyValidity, iFeat);
1099 : }
1100 : }
1101 413 : panOffsets[nFeatCount] = nOffset;
1102 :
1103 413 : psChild->n_children = 1;
1104 413 : psChild->children = static_cast<struct ArrowArray **>(
1105 413 : CPLCalloc(1, sizeof(struct ArrowArray *)));
1106 826 : psChild->children[0] = static_cast<struct ArrowArray *>(
1107 413 : CPLCalloc(1, sizeof(struct ArrowArray)));
1108 413 : auto psValueChild = psChild->children[0];
1109 :
1110 413 : psValueChild->release = OGRLayerDefaultReleaseArray;
1111 413 : psValueChild->n_buffers = 2;
1112 413 : psValueChild->buffers =
1113 413 : static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1114 413 : psValueChild->length = nOffset;
1115 : T *panValues = static_cast<T *>(
1116 413 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(T) * (nOffset + 1)));
1117 413 : if (panValues == nullptr)
1118 0 : return 0;
1119 413 : psValueChild->buffers[1] = panValues;
1120 :
1121 413 : nOffset = 0;
1122 1442 : for (size_t iFeat = 0; iFeat < nFeatCount; ++iFeat)
1123 : {
1124 1029 : auto &poFeature = apoFeatures[iFeat];
1125 1029 : const auto psRawField = poFeature->GetRawFieldRef(i);
1126 1029 : if (IsValidField(psRawField))
1127 : {
1128 523 : const int nCount = GetFromList::getCount(psRawField);
1129 523 : const auto paList = GetFromList::getValues(psRawField);
1130 : if (sizeof(*paList) == sizeof(T))
1131 456 : memcpy(panValues + nOffset, paList, nCount * sizeof(T));
1132 : else
1133 : {
1134 203 : for (int j = 0; j < nCount; ++j)
1135 : {
1136 136 : panValues[nOffset + j] = static_cast<T>(paList[j]);
1137 : }
1138 : }
1139 523 : nOffset += static_cast<OffsetType>(nCount);
1140 : }
1141 : }
1142 :
1143 413 : return nFeatCount;
1144 : }
1145 :
1146 : template <class OffsetType, class GetFromList>
1147 : static size_t
1148 49 : FillListArrayBool(struct ArrowArray *psChild,
1149 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1150 : const size_t nFeatureCountLimit, const bool bIsNullable,
1151 : const int i, const size_t nMemLimit)
1152 : {
1153 49 : psChild->n_buffers = 2;
1154 49 : psChild->buffers = static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1155 49 : uint8_t *pabyValidity = nullptr;
1156 : OffsetType *panOffsets =
1157 49 : static_cast<OffsetType *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
1158 : sizeof(OffsetType) * (1 + nFeatureCountLimit)));
1159 49 : if (panOffsets == nullptr)
1160 0 : return 0;
1161 49 : psChild->buffers[1] = panOffsets;
1162 :
1163 49 : OffsetType nOffset = 0;
1164 49 : size_t nFeatCount = 0;
1165 138 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat, ++nFeatCount)
1166 : {
1167 91 : panOffsets[iFeat] = nOffset;
1168 91 : auto &poFeature = apoFeatures[iFeat];
1169 91 : const auto psRawField = poFeature->GetRawFieldRef(i);
1170 91 : if (IsValidField(psRawField))
1171 : {
1172 60 : const unsigned nCount = GetFromList::getCount(psRawField);
1173 60 : if (nCount > static_cast<size_t>(nMemLimit - nOffset))
1174 : {
1175 2 : if (nFeatCount == 0)
1176 1 : return 0;
1177 1 : break;
1178 : }
1179 58 : nOffset += static_cast<OffsetType>(nCount);
1180 : }
1181 31 : else if (bIsNullable)
1182 : {
1183 31 : ++psChild->null_count;
1184 31 : if (pabyValidity == nullptr)
1185 : {
1186 27 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1187 27 : psChild->buffers[0] = pabyValidity;
1188 27 : if (pabyValidity == nullptr)
1189 0 : return 0;
1190 : }
1191 31 : UnsetBit(pabyValidity, iFeat);
1192 : }
1193 : }
1194 48 : panOffsets[nFeatCount] = nOffset;
1195 :
1196 48 : psChild->n_children = 1;
1197 48 : psChild->children = static_cast<struct ArrowArray **>(
1198 48 : CPLCalloc(1, sizeof(struct ArrowArray *)));
1199 96 : psChild->children[0] = static_cast<struct ArrowArray *>(
1200 48 : CPLCalloc(1, sizeof(struct ArrowArray)));
1201 48 : auto psValueChild = psChild->children[0];
1202 :
1203 48 : psValueChild->release = OGRLayerDefaultReleaseArray;
1204 48 : psValueChild->n_buffers = 2;
1205 48 : psValueChild->buffers =
1206 48 : static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1207 48 : psValueChild->length = nOffset;
1208 : uint8_t *panValues = static_cast<uint8_t *>(
1209 48 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE((nOffset + 7 + 1) / 8));
1210 48 : if (panValues == nullptr)
1211 0 : return 0;
1212 48 : memset(panValues, 0, (nOffset + 7) / 8);
1213 48 : psValueChild->buffers[1] = panValues;
1214 :
1215 48 : nOffset = 0;
1216 138 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat)
1217 : {
1218 90 : auto &poFeature = apoFeatures[iFeat];
1219 90 : const auto psRawField = poFeature->GetRawFieldRef(i);
1220 90 : if (IsValidField(psRawField))
1221 : {
1222 59 : const int nCount = GetFromList::getCount(psRawField);
1223 59 : const auto paList = GetFromList::getValues(psRawField);
1224 :
1225 373 : for (int j = 0; j < nCount; ++j)
1226 : {
1227 314 : if (paList[j])
1228 55 : SetBit(panValues, nOffset + j);
1229 : }
1230 59 : nOffset += static_cast<OffsetType>(nCount);
1231 : }
1232 : }
1233 :
1234 48 : return nFeatCount;
1235 : }
1236 :
1237 : /************************************************************************/
1238 : /* FillStringArray() */
1239 : /************************************************************************/
1240 :
1241 : template <class T>
1242 : static size_t
1243 3762 : FillStringArray(struct ArrowArray *psChild,
1244 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1245 : const size_t nFeatureCountLimit, const bool bIsNullable,
1246 : const int i, const size_t nMemLimit)
1247 : {
1248 3762 : psChild->n_buffers = 3;
1249 3762 : psChild->buffers = static_cast<const void **>(CPLCalloc(3, sizeof(void *)));
1250 3762 : uint8_t *pabyValidity = nullptr;
1251 : T *panOffsets = static_cast<T *>(
1252 3762 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(T) * (1 + nFeatureCountLimit)));
1253 3762 : if (panOffsets == nullptr)
1254 0 : return 0;
1255 3762 : psChild->buffers[1] = panOffsets;
1256 :
1257 3762 : size_t nOffset = 0;
1258 3762 : size_t nFeatCount = 0;
1259 33957 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat, ++nFeatCount)
1260 : {
1261 30215 : panOffsets[iFeat] = static_cast<T>(nOffset);
1262 30215 : const auto psRawField = apoFeatures[iFeat]->GetRawFieldRef(i);
1263 30215 : if (IsValidField(psRawField))
1264 : {
1265 26875 : const size_t nLen = strlen(psRawField->String);
1266 26875 : if (nLen > nMemLimit - nOffset)
1267 : {
1268 20 : if (nFeatCount == 0)
1269 19 : return 0;
1270 1 : break;
1271 : }
1272 26855 : nOffset += static_cast<T>(nLen);
1273 : }
1274 3340 : else if (bIsNullable)
1275 : {
1276 3340 : ++psChild->null_count;
1277 3340 : if (pabyValidity == nullptr)
1278 : {
1279 1131 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1280 1131 : psChild->buffers[0] = pabyValidity;
1281 1131 : if (pabyValidity == nullptr)
1282 0 : return 0;
1283 : }
1284 3340 : UnsetBit(pabyValidity, iFeat);
1285 : }
1286 : }
1287 3743 : panOffsets[nFeatCount] = static_cast<T>(nOffset);
1288 :
1289 : char *pachValues =
1290 3743 : static_cast<char *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nOffset + 1));
1291 3743 : if (pachValues == nullptr)
1292 0 : return 0;
1293 3743 : psChild->buffers[2] = pachValues;
1294 :
1295 3743 : nOffset = 0;
1296 33938 : for (size_t iFeat = 0; iFeat < nFeatCount; ++iFeat)
1297 : {
1298 30195 : const size_t nLen =
1299 30195 : static_cast<size_t>(panOffsets[iFeat + 1] - panOffsets[iFeat]);
1300 30195 : if (nLen)
1301 : {
1302 25303 : const auto psRawField = apoFeatures[iFeat]->GetRawFieldRef(i);
1303 25303 : memcpy(pachValues + nOffset, psRawField->String, nLen);
1304 25303 : nOffset += nLen;
1305 : }
1306 : }
1307 :
1308 3743 : return nFeatCount;
1309 : }
1310 :
1311 : /************************************************************************/
1312 : /* FillStringListArray() */
1313 : /************************************************************************/
1314 :
1315 : template <class OffsetType>
1316 : static size_t
1317 203 : FillStringListArray(struct ArrowArray *psChild,
1318 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1319 : const size_t nFeatureCountLimit, const bool bIsNullable,
1320 : const int i, const size_t nMemLimit)
1321 : {
1322 203 : psChild->n_buffers = 2;
1323 203 : psChild->buffers = static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1324 203 : uint8_t *pabyValidity = nullptr;
1325 : OffsetType *panOffsets =
1326 203 : static_cast<OffsetType *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
1327 : sizeof(OffsetType) * (1 + nFeatureCountLimit)));
1328 203 : if (panOffsets == nullptr)
1329 0 : return false;
1330 203 : psChild->buffers[1] = panOffsets;
1331 :
1332 203 : OffsetType nStrings = 0;
1333 203 : OffsetType nCountChars = 0;
1334 203 : size_t nFeatCount = 0;
1335 516 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat, ++nFeatCount)
1336 : {
1337 315 : panOffsets[iFeat] = nStrings;
1338 315 : auto &poFeature = apoFeatures[iFeat];
1339 315 : const auto psRawField = poFeature->GetRawFieldRef(i);
1340 315 : if (IsValidField(psRawField))
1341 : {
1342 108 : const int nCount = psRawField->StringList.nCount;
1343 108 : if (static_cast<size_t>(nCount) >
1344 108 : static_cast<size_t>(nMemLimit - nStrings))
1345 : {
1346 0 : if (nFeatCount == 0)
1347 0 : return 0;
1348 0 : goto after_loop;
1349 : }
1350 280 : for (int j = 0; j < nCount; ++j)
1351 : {
1352 174 : const size_t nLen = strlen(psRawField->StringList.paList[j]);
1353 174 : if (nLen > static_cast<size_t>(nMemLimit - nCountChars))
1354 : {
1355 2 : if (nFeatCount == 0)
1356 1 : return 0;
1357 1 : goto after_loop;
1358 : }
1359 172 : nCountChars += static_cast<OffsetType>(nLen);
1360 : }
1361 106 : nStrings += static_cast<OffsetType>(nCount);
1362 : }
1363 207 : else if (bIsNullable)
1364 : {
1365 207 : ++psChild->null_count;
1366 207 : if (pabyValidity == nullptr)
1367 : {
1368 152 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1369 152 : psChild->buffers[0] = pabyValidity;
1370 152 : if (pabyValidity == nullptr)
1371 0 : return 0;
1372 : }
1373 207 : UnsetBit(pabyValidity, iFeat);
1374 : }
1375 : }
1376 201 : after_loop:
1377 202 : panOffsets[nFeatCount] = nStrings;
1378 :
1379 202 : psChild->n_children = 1;
1380 202 : psChild->children = static_cast<struct ArrowArray **>(
1381 202 : CPLCalloc(1, sizeof(struct ArrowArray *)));
1382 404 : psChild->children[0] = static_cast<struct ArrowArray *>(
1383 202 : CPLCalloc(1, sizeof(struct ArrowArray)));
1384 202 : auto psValueChild = psChild->children[0];
1385 :
1386 202 : psValueChild->release = OGRLayerDefaultReleaseArray;
1387 202 : psValueChild->length = nStrings;
1388 202 : psValueChild->n_buffers = 3;
1389 202 : psValueChild->buffers =
1390 202 : static_cast<const void **>(CPLCalloc(3, sizeof(void *)));
1391 :
1392 : OffsetType *panChildOffsets = static_cast<OffsetType *>(
1393 202 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(OffsetType) * (1 + nStrings)));
1394 202 : if (panChildOffsets == nullptr)
1395 0 : return 0;
1396 202 : psValueChild->buffers[1] = panChildOffsets;
1397 :
1398 : char *pachValues =
1399 202 : static_cast<char *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nCountChars + 1));
1400 202 : if (pachValues == nullptr)
1401 0 : return 0;
1402 202 : psValueChild->buffers[2] = pachValues;
1403 :
1404 202 : nStrings = 0;
1405 202 : nCountChars = 0;
1406 515 : for (size_t iFeat = 0; iFeat < nFeatCount; ++iFeat)
1407 : {
1408 313 : auto &poFeature = apoFeatures[iFeat];
1409 313 : const auto psRawField = poFeature->GetRawFieldRef(i);
1410 313 : if (IsValidField(psRawField))
1411 : {
1412 106 : const int nCount = psRawField->StringList.nCount;
1413 278 : for (int j = 0; j < nCount; ++j)
1414 : {
1415 172 : panChildOffsets[nStrings] = nCountChars;
1416 172 : ++nStrings;
1417 172 : const size_t nLen = strlen(psRawField->StringList.paList[j]);
1418 172 : memcpy(pachValues + nCountChars,
1419 172 : psRawField->StringList.paList[j], nLen);
1420 172 : nCountChars += static_cast<OffsetType>(nLen);
1421 : }
1422 : }
1423 : }
1424 202 : panChildOffsets[nStrings] = nCountChars;
1425 :
1426 202 : return nFeatCount;
1427 : }
1428 :
1429 : /************************************************************************/
1430 : /* FillBinaryArray() */
1431 : /************************************************************************/
1432 :
1433 : template <class T>
1434 : static size_t
1435 905 : FillBinaryArray(struct ArrowArray *psChild,
1436 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1437 : const size_t nFeatureCountLimit, const bool bIsNullable,
1438 : const int i, const size_t nMemLimit)
1439 : {
1440 905 : psChild->n_buffers = 3;
1441 905 : psChild->buffers = static_cast<const void **>(CPLCalloc(3, sizeof(void *)));
1442 905 : uint8_t *pabyValidity = nullptr;
1443 : T *panOffsets = static_cast<T *>(
1444 905 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(T) * (1 + nFeatureCountLimit)));
1445 905 : if (panOffsets == nullptr)
1446 0 : return 0;
1447 905 : psChild->buffers[1] = panOffsets;
1448 :
1449 905 : T nOffset = 0;
1450 905 : size_t nFeatCount = 0;
1451 4362 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat, ++nFeatCount)
1452 : {
1453 3459 : panOffsets[iFeat] = nOffset;
1454 3459 : const auto psRawField = apoFeatures[iFeat]->GetRawFieldRef(i);
1455 3459 : if (IsValidField(psRawField))
1456 : {
1457 3402 : const size_t nLen = psRawField->Binary.nCount;
1458 3402 : if (nLen > static_cast<size_t>(nMemLimit - nOffset))
1459 : {
1460 2 : if (iFeat == 0)
1461 1 : return 0;
1462 1 : break;
1463 : }
1464 3400 : nOffset += static_cast<T>(nLen);
1465 : }
1466 57 : else if (bIsNullable)
1467 : {
1468 57 : ++psChild->null_count;
1469 57 : if (pabyValidity == nullptr)
1470 : {
1471 49 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1472 49 : psChild->buffers[0] = pabyValidity;
1473 49 : if (pabyValidity == nullptr)
1474 0 : return 0;
1475 : }
1476 57 : UnsetBit(pabyValidity, iFeat);
1477 : }
1478 : }
1479 904 : panOffsets[nFeatCount] = nOffset;
1480 :
1481 : GByte *pabyValues =
1482 904 : static_cast<GByte *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nOffset + 1));
1483 904 : if (pabyValues == nullptr)
1484 0 : return 0;
1485 904 : psChild->buffers[2] = pabyValues;
1486 :
1487 904 : nOffset = 0;
1488 4361 : for (size_t iFeat = 0; iFeat < nFeatCount; ++iFeat)
1489 : {
1490 3457 : const size_t nLen =
1491 3457 : static_cast<size_t>(panOffsets[iFeat + 1] - panOffsets[iFeat]);
1492 3457 : if (nLen)
1493 : {
1494 3400 : const auto psRawField = apoFeatures[iFeat]->GetRawFieldRef(i);
1495 3400 : memcpy(pabyValues + nOffset, psRawField->Binary.paData, nLen);
1496 3400 : nOffset += static_cast<T>(nLen);
1497 : }
1498 : }
1499 :
1500 904 : return nFeatCount;
1501 : }
1502 :
1503 : /************************************************************************/
1504 : /* FillFixedWidthBinaryArray() */
1505 : /************************************************************************/
1506 :
1507 : static bool
1508 8 : FillFixedWidthBinaryArray(struct ArrowArray *psChild,
1509 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1510 : const size_t nFeatureCountLimit,
1511 : const bool bIsNullable, const int nWidth, const int i)
1512 : {
1513 8 : psChild->n_buffers = 2;
1514 8 : psChild->buffers = static_cast<const void **>(CPLCalloc(3, sizeof(void *)));
1515 8 : uint8_t *pabyValidity = nullptr;
1516 :
1517 8 : assert(nFeatureCountLimit + 1 <=
1518 : std::numeric_limits<size_t>::max() / nWidth);
1519 : GByte *pabyValues = static_cast<GByte *>(
1520 8 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE((nFeatureCountLimit + 1) * nWidth));
1521 8 : if (pabyValues == nullptr)
1522 0 : return false;
1523 8 : psChild->buffers[1] = pabyValues;
1524 :
1525 29 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat)
1526 : {
1527 21 : const auto psRawField = apoFeatures[iFeat]->GetRawFieldRef(i);
1528 21 : if (IsValidField(psRawField))
1529 : {
1530 20 : const auto nLen = psRawField->Binary.nCount;
1531 20 : if (nLen < nWidth)
1532 : {
1533 0 : memcpy(pabyValues + iFeat * nWidth, psRawField->Binary.paData,
1534 : nLen);
1535 0 : memset(pabyValues + iFeat * nWidth + nLen, 0, nWidth - nLen);
1536 : }
1537 : else
1538 : {
1539 20 : memcpy(pabyValues + iFeat * nWidth, psRawField->Binary.paData,
1540 : nWidth);
1541 : }
1542 : }
1543 : else
1544 : {
1545 1 : memset(pabyValues + iFeat * nWidth, 0, nWidth);
1546 1 : if (bIsNullable)
1547 : {
1548 1 : ++psChild->null_count;
1549 1 : if (pabyValidity == nullptr)
1550 : {
1551 1 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1552 1 : psChild->buffers[0] = pabyValidity;
1553 1 : if (pabyValidity == nullptr)
1554 0 : return false;
1555 : }
1556 1 : UnsetBit(pabyValidity, iFeat);
1557 : }
1558 : }
1559 : }
1560 :
1561 8 : return true;
1562 : }
1563 :
1564 : /************************************************************************/
1565 : /* FillWKBGeometryArray() */
1566 : /************************************************************************/
1567 :
1568 : template <class T>
1569 : static size_t
1570 1234 : FillWKBGeometryArray(struct ArrowArray *psChild,
1571 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1572 : const size_t nFeatureCountLimit,
1573 : const OGRGeomFieldDefn *poFieldDefn, const int i,
1574 : const size_t nMemLimit)
1575 : {
1576 1234 : const bool bIsNullable = CPL_TO_BOOL(poFieldDefn->IsNullable());
1577 1234 : psChild->n_buffers = 3;
1578 1234 : psChild->buffers = static_cast<const void **>(CPLCalloc(3, sizeof(void *)));
1579 1234 : uint8_t *pabyValidity = nullptr;
1580 : T *panOffsets = static_cast<T *>(
1581 1234 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(T) * (1 + nFeatureCountLimit)));
1582 1234 : if (panOffsets == nullptr)
1583 0 : return 0;
1584 1234 : psChild->buffers[1] = panOffsets;
1585 1234 : const auto eGeomType = poFieldDefn->GetType();
1586 3702 : auto poEmptyGeom =
1587 : std::unique_ptr<OGRGeometry>(OGRGeometryFactory::createGeometry(
1588 1234 : (eGeomType == wkbNone || wkbFlatten(eGeomType) == wkbUnknown)
1589 : ? wkbGeometryCollection
1590 : : eGeomType));
1591 :
1592 1234 : size_t nOffset = 0;
1593 1234 : size_t nFeatCount = 0;
1594 14325 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat, ++nFeatCount)
1595 : {
1596 13092 : panOffsets[iFeat] = static_cast<T>(nOffset);
1597 13092 : const auto poGeom = apoFeatures[iFeat]->GetGeomFieldRef(i);
1598 13092 : if (poGeom != nullptr)
1599 : {
1600 12539 : const size_t nLen = poGeom->WkbSize();
1601 12539 : if (nLen > nMemLimit - nOffset)
1602 : {
1603 1 : if (nFeatCount == 0)
1604 0 : return 0;
1605 1 : break;
1606 : }
1607 12538 : nOffset += static_cast<T>(nLen);
1608 : }
1609 553 : else if (bIsNullable)
1610 : {
1611 553 : ++psChild->null_count;
1612 553 : if (pabyValidity == nullptr)
1613 : {
1614 271 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1615 271 : psChild->buffers[0] = pabyValidity;
1616 271 : if (pabyValidity == nullptr)
1617 0 : return 0;
1618 : }
1619 553 : UnsetBit(pabyValidity, iFeat);
1620 : }
1621 0 : else if (poEmptyGeom)
1622 : {
1623 0 : const size_t nLen = poEmptyGeom->WkbSize();
1624 0 : if (nLen > nMemLimit - nOffset)
1625 : {
1626 0 : if (nFeatCount == 0)
1627 0 : return 0;
1628 0 : break;
1629 : }
1630 0 : nOffset += static_cast<T>(nLen);
1631 : }
1632 : }
1633 1234 : panOffsets[nFeatCount] = static_cast<T>(nOffset);
1634 :
1635 : GByte *pabyValues =
1636 1234 : static_cast<GByte *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nOffset + 1));
1637 1234 : if (pabyValues == nullptr)
1638 0 : return 0;
1639 1234 : psChild->buffers[2] = pabyValues;
1640 :
1641 1234 : nOffset = 0;
1642 14325 : for (size_t iFeat = 0; iFeat < nFeatCount; ++iFeat)
1643 : {
1644 13091 : const size_t nLen =
1645 13091 : static_cast<size_t>(panOffsets[iFeat + 1] - panOffsets[iFeat]);
1646 13091 : if (nLen)
1647 : {
1648 12538 : const auto poGeom = apoFeatures[iFeat]->GetGeomFieldRef(i);
1649 12538 : poGeom->exportToWkb(wkbNDR, pabyValues + nOffset, wkbVariantIso);
1650 12538 : nOffset += nLen;
1651 : }
1652 553 : else if (!bIsNullable && poEmptyGeom)
1653 : {
1654 0 : poEmptyGeom->exportToWkb(wkbNDR, pabyValues + nOffset,
1655 : wkbVariantIso);
1656 0 : nOffset += nLen;
1657 : }
1658 : }
1659 :
1660 1234 : return nFeatCount;
1661 : }
1662 :
1663 : /************************************************************************/
1664 : /* FillDateArray() */
1665 : /************************************************************************/
1666 :
1667 125 : static bool FillDateArray(struct ArrowArray *psChild,
1668 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1669 : const size_t nFeatureCountLimit,
1670 : const bool bIsNullable, const int i)
1671 : {
1672 125 : psChild->n_buffers = 2;
1673 125 : psChild->buffers = static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1674 125 : uint8_t *pabyValidity = nullptr;
1675 125 : int32_t *panValues = static_cast<int32_t *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
1676 : sizeof(int32_t) * (nFeatureCountLimit + 1)));
1677 125 : if (panValues == nullptr)
1678 0 : return false;
1679 125 : psChild->buffers[1] = panValues;
1680 475 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat)
1681 : {
1682 350 : auto &poFeature = apoFeatures[iFeat];
1683 350 : const auto psRawField = poFeature->GetRawFieldRef(i);
1684 350 : if (IsValidField(psRawField))
1685 : {
1686 : struct tm brokenDown;
1687 262 : memset(&brokenDown, 0, sizeof(brokenDown));
1688 262 : brokenDown.tm_year = psRawField->Date.Year - 1900;
1689 262 : brokenDown.tm_mon = psRawField->Date.Month - 1;
1690 262 : brokenDown.tm_mday = psRawField->Date.Day;
1691 262 : panValues[iFeat] =
1692 262 : static_cast<int>(CPLYMDHMSToUnixTime(&brokenDown) / 86400);
1693 : }
1694 88 : else if (bIsNullable)
1695 : {
1696 88 : panValues[iFeat] = 0;
1697 88 : ++psChild->null_count;
1698 88 : if (pabyValidity == nullptr)
1699 : {
1700 61 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1701 61 : psChild->buffers[0] = pabyValidity;
1702 61 : if (pabyValidity == nullptr)
1703 0 : return false;
1704 : }
1705 88 : UnsetBit(pabyValidity, iFeat);
1706 : }
1707 : else
1708 : {
1709 0 : panValues[iFeat] = 0;
1710 : }
1711 : }
1712 125 : return true;
1713 : }
1714 :
1715 : /************************************************************************/
1716 : /* FillTimeArray() */
1717 : /************************************************************************/
1718 :
1719 72 : static bool FillTimeArray(struct ArrowArray *psChild,
1720 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1721 : const size_t nFeatureCountLimit,
1722 : const bool bIsNullable, const int i)
1723 : {
1724 72 : psChild->n_buffers = 2;
1725 72 : psChild->buffers = static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1726 72 : uint8_t *pabyValidity = nullptr;
1727 72 : int32_t *panValues = static_cast<int32_t *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
1728 : sizeof(int32_t) * (nFeatureCountLimit + 1)));
1729 72 : if (panValues == nullptr)
1730 0 : return false;
1731 72 : psChild->buffers[1] = panValues;
1732 667 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat)
1733 : {
1734 595 : auto &poFeature = apoFeatures[iFeat];
1735 595 : const auto psRawField = poFeature->GetRawFieldRef(i);
1736 595 : if (IsValidField(psRawField))
1737 : {
1738 548 : panValues[iFeat] =
1739 548 : psRawField->Date.Hour * 3600000 +
1740 548 : psRawField->Date.Minute * 60000 +
1741 548 : static_cast<int>(psRawField->Date.Second * 1000 + 0.5f);
1742 : }
1743 47 : else if (bIsNullable)
1744 : {
1745 47 : panValues[iFeat] = 0;
1746 47 : ++psChild->null_count;
1747 47 : if (pabyValidity == nullptr)
1748 : {
1749 39 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1750 39 : psChild->buffers[0] = pabyValidity;
1751 39 : if (pabyValidity == nullptr)
1752 0 : return false;
1753 : }
1754 47 : UnsetBit(pabyValidity, iFeat);
1755 : }
1756 : else
1757 : {
1758 0 : panValues[iFeat] = 0;
1759 : }
1760 : }
1761 72 : return true;
1762 : }
1763 :
1764 : /************************************************************************/
1765 : /* FillDateTimeArray() */
1766 : /************************************************************************/
1767 :
1768 : static bool
1769 712 : FillDateTimeArray(struct ArrowArray *psChild,
1770 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1771 : const size_t nFeatureCountLimit, const bool bIsNullable,
1772 : const int i, int nFieldTZFlag)
1773 : {
1774 712 : psChild->n_buffers = 2;
1775 712 : psChild->buffers = static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1776 712 : uint8_t *pabyValidity = nullptr;
1777 712 : int64_t *panValues = static_cast<int64_t *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
1778 : sizeof(int64_t) * (nFeatureCountLimit + 1)));
1779 712 : if (panValues == nullptr)
1780 0 : return false;
1781 712 : psChild->buffers[1] = panValues;
1782 : struct tm brokenDown;
1783 712 : memset(&brokenDown, 0, sizeof(brokenDown));
1784 3141 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat)
1785 : {
1786 2429 : auto &poFeature = apoFeatures[iFeat];
1787 2429 : const auto psRawField = poFeature->GetRawFieldRef(i);
1788 2429 : if (IsValidField(psRawField))
1789 : {
1790 1670 : brokenDown.tm_year = psRawField->Date.Year - 1900;
1791 1670 : brokenDown.tm_mon = psRawField->Date.Month - 1;
1792 1670 : brokenDown.tm_mday = psRawField->Date.Day;
1793 1670 : brokenDown.tm_hour = psRawField->Date.Hour;
1794 1670 : brokenDown.tm_min = psRawField->Date.Minute;
1795 1670 : brokenDown.tm_sec = static_cast<int>(psRawField->Date.Second);
1796 : auto nVal =
1797 1670 : CPLYMDHMSToUnixTime(&brokenDown) * 1000 +
1798 1670 : (static_cast<int>(psRawField->Date.Second * 1000 + 0.5f) %
1799 1670 : 1000);
1800 1670 : if (nFieldTZFlag >= OGR_TZFLAG_MIXED_TZ &&
1801 65 : psRawField->Date.TZFlag > OGR_TZFLAG_MIXED_TZ)
1802 : {
1803 : // Convert for psRawField->Date.TZFlag to UTC
1804 65 : const int TZOffset =
1805 65 : (psRawField->Date.TZFlag - OGR_TZFLAG_UTC) * 15;
1806 65 : const int TZOffsetMS = TZOffset * 60 * 1000;
1807 65 : nVal -= TZOffsetMS;
1808 : }
1809 1670 : panValues[iFeat] = nVal;
1810 : }
1811 759 : else if (bIsNullable)
1812 : {
1813 759 : panValues[iFeat] = 0;
1814 759 : ++psChild->null_count;
1815 759 : if (pabyValidity == nullptr)
1816 : {
1817 261 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1818 261 : psChild->buffers[0] = pabyValidity;
1819 261 : if (pabyValidity == nullptr)
1820 0 : return false;
1821 : }
1822 759 : UnsetBit(pabyValidity, iFeat);
1823 : }
1824 : else
1825 : {
1826 0 : panValues[iFeat] = 0;
1827 : }
1828 : }
1829 712 : return true;
1830 : }
1831 :
1832 : /************************************************************************/
1833 : /* FillDateTimeArrayAsString() */
1834 : /************************************************************************/
1835 :
1836 : static size_t
1837 8 : FillDateTimeArrayAsString(struct ArrowArray *psChild,
1838 : std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1839 : const size_t nFeatureCountLimit,
1840 : const bool bIsNullable, const int i,
1841 : const size_t nMemLimit)
1842 : {
1843 8 : psChild->n_buffers = 3;
1844 8 : psChild->buffers = static_cast<const void **>(CPLCalloc(3, sizeof(void *)));
1845 8 : uint8_t *pabyValidity = nullptr;
1846 : using T = uint32_t;
1847 : T *panOffsets = static_cast<T *>(
1848 8 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(T) * (1 + nFeatureCountLimit)));
1849 8 : if (panOffsets == nullptr)
1850 0 : return 0;
1851 8 : psChild->buffers[1] = panOffsets;
1852 :
1853 8 : size_t nOffset = 0;
1854 8 : size_t nFeatCount = 0;
1855 46 : for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat, ++nFeatCount)
1856 : {
1857 38 : panOffsets[iFeat] = static_cast<T>(nOffset);
1858 38 : const auto psRawField = apoFeatures[iFeat]->GetRawFieldRef(i);
1859 38 : if (IsValidField(psRawField))
1860 : {
1861 36 : size_t nLen = strlen("YYYY-MM-DDTHH:MM:SS");
1862 36 : if (fmodf(psRawField->Date.Second, 1.0f) != 0)
1863 27 : nLen += strlen(".sss");
1864 36 : if (psRawField->Date.TZFlag == OGR_TZFLAG_UTC)
1865 7 : nLen += 1; // 'Z'
1866 29 : else if (psRawField->Date.TZFlag > OGR_TZFLAG_MIXED_TZ)
1867 12 : nLen += strlen("+hh:mm");
1868 36 : if (nLen > nMemLimit - nOffset)
1869 : {
1870 0 : if (nFeatCount == 0)
1871 0 : return 0;
1872 0 : break;
1873 : }
1874 36 : nOffset += static_cast<T>(nLen);
1875 : }
1876 2 : else if (bIsNullable)
1877 : {
1878 2 : ++psChild->null_count;
1879 2 : if (pabyValidity == nullptr)
1880 : {
1881 2 : pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1882 2 : psChild->buffers[0] = pabyValidity;
1883 2 : if (pabyValidity == nullptr)
1884 0 : return 0;
1885 : }
1886 2 : UnsetBit(pabyValidity, iFeat);
1887 : }
1888 : }
1889 8 : panOffsets[nFeatCount] = static_cast<T>(nOffset);
1890 :
1891 : char *pachValues =
1892 8 : static_cast<char *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nOffset + 1));
1893 8 : if (pachValues == nullptr)
1894 0 : return 0;
1895 8 : psChild->buffers[2] = pachValues;
1896 :
1897 8 : nOffset = 0;
1898 : char szBuffer[OGR_SIZEOF_ISO8601_DATETIME_BUFFER];
1899 : OGRISO8601Format sFormat;
1900 8 : sFormat.ePrecision = OGRISO8601Precision::AUTO;
1901 46 : for (size_t iFeat = 0; iFeat < nFeatCount; ++iFeat)
1902 : {
1903 38 : const int nLen =
1904 38 : static_cast<int>(panOffsets[iFeat + 1] - panOffsets[iFeat]);
1905 38 : if (nLen)
1906 : {
1907 36 : const auto psRawField = apoFeatures[iFeat]->GetRawFieldRef(i);
1908 36 : int nBufSize = OGRGetISO8601DateTime(psRawField, sFormat, szBuffer);
1909 36 : if (nBufSize)
1910 : {
1911 36 : memcpy(pachValues + nOffset, szBuffer,
1912 36 : std::min(nLen, nBufSize));
1913 : }
1914 36 : if (nBufSize < nLen)
1915 : {
1916 5 : memset(pachValues + nOffset + nBufSize, 0, nLen - nBufSize);
1917 : }
1918 36 : nOffset += nLen;
1919 : }
1920 : }
1921 :
1922 8 : return nFeatCount;
1923 : }
1924 :
1925 : /************************************************************************/
1926 : /* GetNextArrowArray() */
1927 : /************************************************************************/
1928 :
1929 : /** Default implementation of the ArrowArrayStream::get_next() callback.
1930 : *
1931 : * To be used by driver implementations that have a custom GetArrowStream()
1932 : * implementation.
1933 : *
1934 : * @since GDAL 3.6
1935 : */
1936 3568 : int OGRLayer::GetNextArrowArray(struct ArrowArrayStream *stream,
1937 : struct ArrowArray *out_array)
1938 : {
1939 3568 : ArrowArrayStreamPrivateDataSharedDataWrapper *poPrivate =
1940 : static_cast<ArrowArrayStreamPrivateDataSharedDataWrapper *>(
1941 : stream->private_data);
1942 :
1943 3568 : const bool bIncludeFID = CPLTestBool(
1944 : m_aosArrowArrayStreamOptions.FetchNameValueDef("INCLUDE_FID", "YES"));
1945 3568 : const bool bDateTimeAsString = m_aosArrowArrayStreamOptions.FetchBool(
1946 : GAS_OPT_DATETIME_AS_STRING, false);
1947 3568 : int nMaxBatchSize = atoi(m_aosArrowArrayStreamOptions.FetchNameValueDef(
1948 : "MAX_FEATURES_IN_BATCH", "65536"));
1949 3568 : if (nMaxBatchSize <= 0)
1950 0 : nMaxBatchSize = 1;
1951 3568 : if (nMaxBatchSize > INT_MAX - 1)
1952 0 : nMaxBatchSize = INT_MAX - 1;
1953 :
1954 : auto &oFeatureQueue =
1955 3568 : m_poSharedArrowArrayStreamPrivateData->m_oFeatureQueue;
1956 :
1957 3568 : memset(out_array, 0, sizeof(*out_array));
1958 :
1959 3568 : auto poLayerDefn = GetLayerDefn();
1960 3568 : const int nFieldCount = poLayerDefn->GetFieldCount();
1961 3568 : const int nGeomFieldCount = poLayerDefn->GetGeomFieldCount();
1962 3568 : const int nMaxChildren =
1963 3568 : (bIncludeFID ? 1 : 0) + nFieldCount + nGeomFieldCount;
1964 3568 : int iSchemaChild = 0;
1965 :
1966 3568 : if (!m_poSharedArrowArrayStreamPrivateData->m_anQueriedFIDs.empty())
1967 : {
1968 6 : if (poPrivate->poShared->m_bEOF)
1969 : {
1970 2 : return 0;
1971 : }
1972 4 : if (m_poSharedArrowArrayStreamPrivateData->m_iQueriedFIDS == 0)
1973 : {
1974 4 : CPLDebug("OGR", "Using fast FID filtering");
1975 : }
1976 8 : while (
1977 24 : oFeatureQueue.size() < static_cast<size_t>(nMaxBatchSize) &&
1978 12 : m_poSharedArrowArrayStreamPrivateData->m_iQueriedFIDS <
1979 12 : m_poSharedArrowArrayStreamPrivateData->m_anQueriedFIDs.size())
1980 : {
1981 : const auto nFID =
1982 8 : m_poSharedArrowArrayStreamPrivateData->m_anQueriedFIDs
1983 8 : [m_poSharedArrowArrayStreamPrivateData->m_iQueriedFIDS];
1984 16 : auto poFeature = std::unique_ptr<OGRFeature>(GetFeature(nFID));
1985 8 : ++m_poSharedArrowArrayStreamPrivateData->m_iQueriedFIDS;
1986 8 : if (poFeature && (m_poFilterGeom == nullptr ||
1987 0 : FilterGeometry(poFeature->GetGeomFieldRef(
1988 8 : m_iGeomFieldFilter))))
1989 : {
1990 4 : oFeatureQueue.emplace_back(std::move(poFeature));
1991 : }
1992 : }
1993 8 : if (m_poSharedArrowArrayStreamPrivateData->m_iQueriedFIDS ==
1994 4 : m_poSharedArrowArrayStreamPrivateData->m_anQueriedFIDs.size())
1995 : {
1996 4 : poPrivate->poShared->m_bEOF = true;
1997 : }
1998 : }
1999 3562 : else if (!poPrivate->poShared->m_bEOF)
2000 : {
2001 18852 : while (oFeatureQueue.size() < static_cast<size_t>(nMaxBatchSize))
2002 : {
2003 18849 : auto poFeature = std::unique_ptr<OGRFeature>(GetNextFeature());
2004 18849 : if (!poFeature)
2005 : {
2006 1835 : poPrivate->poShared->m_bEOF = true;
2007 1835 : break;
2008 : }
2009 17014 : oFeatureQueue.emplace_back(std::move(poFeature));
2010 : }
2011 : }
2012 3566 : if (oFeatureQueue.empty())
2013 : {
2014 2112 : return 0;
2015 : }
2016 :
2017 1454 : out_array->release = OGRLayerDefaultReleaseArray;
2018 1454 : out_array->null_count = 0;
2019 :
2020 1454 : out_array->n_children = nMaxChildren;
2021 1454 : out_array->children = static_cast<struct ArrowArray **>(
2022 1454 : CPLCalloc(nMaxChildren, sizeof(struct ArrowArray *)));
2023 1454 : out_array->release = OGRLayerDefaultReleaseArray;
2024 1454 : out_array->n_buffers = 1;
2025 1454 : out_array->buffers =
2026 1454 : static_cast<const void **>(CPLCalloc(1, sizeof(void *)));
2027 :
2028 1454 : size_t nFeatureCount = oFeatureQueue.size();
2029 1454 : const uint32_t nMemLimit = OGRArrowArrayHelper::GetMemLimit();
2030 1454 : if (bIncludeFID)
2031 : {
2032 2652 : out_array->children[iSchemaChild] = static_cast<struct ArrowArray *>(
2033 1326 : CPLCalloc(1, sizeof(struct ArrowArray)));
2034 1326 : auto psChild = out_array->children[iSchemaChild];
2035 1326 : ++iSchemaChild;
2036 1326 : psChild->release = OGRLayerDefaultReleaseArray;
2037 1326 : psChild->n_buffers = 2;
2038 1326 : psChild->buffers =
2039 1326 : static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
2040 : int64_t *panValues =
2041 1326 : static_cast<int64_t *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
2042 : sizeof(int64_t) * (oFeatureQueue.size() + 1)));
2043 1326 : if (panValues == nullptr)
2044 0 : goto error;
2045 1326 : psChild->buffers[1] = panValues;
2046 17914 : for (size_t iFeat = 0; iFeat < oFeatureQueue.size(); ++iFeat)
2047 : {
2048 16588 : panValues[iFeat] = oFeatureQueue[iFeat]->GetFID();
2049 : }
2050 : }
2051 :
2052 13642 : for (int i = 0; i < nFieldCount; ++i)
2053 : {
2054 12213 : const auto poFieldDefn = poLayerDefn->GetFieldDefn(i);
2055 12213 : if (poFieldDefn->IsIgnored())
2056 : {
2057 13 : continue;
2058 : }
2059 :
2060 24400 : out_array->children[iSchemaChild] = static_cast<struct ArrowArray *>(
2061 12200 : CPLCalloc(1, sizeof(struct ArrowArray)));
2062 12200 : auto psChild = out_array->children[iSchemaChild];
2063 12200 : ++iSchemaChild;
2064 12200 : psChild->release = OGRLayerDefaultReleaseArray;
2065 12200 : const bool bIsNullable = CPL_TO_BOOL(poFieldDefn->IsNullable());
2066 12200 : const auto eSubType = poFieldDefn->GetSubType();
2067 12200 : switch (poFieldDefn->GetType())
2068 : {
2069 3514 : case OFTInteger:
2070 : {
2071 3514 : if (eSubType == OFSTBoolean)
2072 : {
2073 138 : if (!FillBoolArray(psChild, oFeatureQueue, nFeatureCount,
2074 : bIsNullable, &OGRField::Integer, i))
2075 0 : goto error;
2076 : }
2077 3376 : else if (eSubType == OFSTInt16)
2078 : {
2079 478 : if (!FillArray<int16_t>(psChild, oFeatureQueue,
2080 : nFeatureCount, bIsNullable,
2081 : &OGRField::Integer, i))
2082 0 : goto error;
2083 : }
2084 : else
2085 : {
2086 2898 : if (!FillArray<int32_t>(psChild, oFeatureQueue,
2087 : nFeatureCount, bIsNullable,
2088 : &OGRField::Integer, i))
2089 0 : goto error;
2090 : }
2091 :
2092 3514 : const auto &osDomainName = poFieldDefn->GetDomainName();
2093 3514 : if (!osDomainName.empty())
2094 : {
2095 13 : auto poDS = GetDataset();
2096 13 : if (poDS)
2097 : {
2098 : const auto poFieldDomain =
2099 13 : poDS->GetFieldDomain(osDomainName);
2100 26 : if (poFieldDomain &&
2101 13 : poFieldDomain->GetDomainType() == OFDT_CODED)
2102 : {
2103 13 : const OGRCodedFieldDomain *poCodedDomain =
2104 : static_cast<const OGRCodedFieldDomain *>(
2105 : poFieldDomain);
2106 13 : OGRArrowArrayHelper::FillDict(psChild,
2107 : poCodedDomain);
2108 : }
2109 : }
2110 : }
2111 :
2112 3514 : break;
2113 : }
2114 :
2115 333 : case OFTInteger64:
2116 : {
2117 333 : if (!FillArray<int64_t>(psChild, oFeatureQueue, nFeatureCount,
2118 : bIsNullable, &OGRField::Integer64, i))
2119 0 : goto error;
2120 333 : break;
2121 : }
2122 :
2123 2093 : case OFTReal:
2124 : {
2125 2093 : if (eSubType == OFSTFloat32)
2126 : {
2127 478 : if (!FillArray<float>(psChild, oFeatureQueue, nFeatureCount,
2128 : bIsNullable, &OGRField::Real, i))
2129 0 : goto error;
2130 : }
2131 : else
2132 : {
2133 1615 : if (!FillArray<double>(psChild, oFeatureQueue,
2134 : nFeatureCount, bIsNullable,
2135 : &OGRField::Real, i))
2136 0 : goto error;
2137 : }
2138 2093 : break;
2139 : }
2140 :
2141 3762 : case OFTString:
2142 : case OFTWideString:
2143 : {
2144 3762 : const size_t nThisFeatureCount = FillStringArray<int32_t>(
2145 : psChild, oFeatureQueue, nFeatureCount, bIsNullable, i,
2146 : nMemLimit);
2147 3762 : if (nThisFeatureCount == 0)
2148 : {
2149 19 : goto error_max_mem;
2150 : }
2151 3743 : if (nThisFeatureCount < nFeatureCount)
2152 1 : nFeatureCount = nThisFeatureCount;
2153 3743 : break;
2154 : }
2155 :
2156 913 : case OFTBinary:
2157 : {
2158 913 : const int nWidth = poFieldDefn->GetWidth();
2159 913 : if (nWidth > 0)
2160 : {
2161 8 : if (nFeatureCount > nMemLimit / nWidth)
2162 : {
2163 1 : nFeatureCount = nMemLimit / nWidth;
2164 1 : if (nFeatureCount == 0)
2165 0 : goto error_max_mem;
2166 : }
2167 8 : if (!FillFixedWidthBinaryArray(psChild, oFeatureQueue,
2168 : nFeatureCount, bIsNullable,
2169 : nWidth, i))
2170 0 : goto error;
2171 : }
2172 : else
2173 : {
2174 905 : const size_t nThisFeatureCount = FillBinaryArray<int32_t>(
2175 : psChild, oFeatureQueue, nFeatureCount, bIsNullable, i,
2176 : nMemLimit);
2177 905 : if (nThisFeatureCount == 0)
2178 : {
2179 1 : goto error_max_mem;
2180 : }
2181 904 : if (nThisFeatureCount < nFeatureCount)
2182 1 : nFeatureCount = nThisFeatureCount;
2183 : }
2184 912 : break;
2185 : }
2186 :
2187 234 : case OFTIntegerList:
2188 : {
2189 : size_t nThisFeatureCount;
2190 234 : if (eSubType == OFSTBoolean)
2191 : {
2192 : nThisFeatureCount =
2193 49 : FillListArrayBool<int32_t, GetFromIntegerList>(
2194 : psChild, oFeatureQueue, nFeatureCount, bIsNullable,
2195 : i, nMemLimit);
2196 : }
2197 185 : else if (eSubType == OFSTInt16)
2198 : {
2199 : nThisFeatureCount =
2200 28 : FillListArray<int32_t, int16_t, GetFromIntegerList>(
2201 : psChild, oFeatureQueue, nFeatureCount, bIsNullable,
2202 : i, nMemLimit);
2203 : }
2204 : else
2205 : {
2206 : nThisFeatureCount =
2207 157 : FillListArray<int32_t, int32_t, GetFromIntegerList>(
2208 : psChild, oFeatureQueue, nFeatureCount, bIsNullable,
2209 : i, nMemLimit);
2210 : }
2211 234 : if (nThisFeatureCount == 0)
2212 : {
2213 2 : goto error_max_mem;
2214 : }
2215 232 : if (nThisFeatureCount < nFeatureCount)
2216 2 : nFeatureCount = nThisFeatureCount;
2217 232 : break;
2218 : }
2219 :
2220 75 : case OFTInteger64List:
2221 : {
2222 : const size_t nThisFeatureCount =
2223 75 : FillListArray<int32_t, int64_t, GetFromInteger64List>(
2224 : psChild, oFeatureQueue, nFeatureCount, bIsNullable, i,
2225 : nMemLimit);
2226 75 : if (nThisFeatureCount == 0)
2227 : {
2228 1 : goto error_max_mem;
2229 : }
2230 74 : if (nThisFeatureCount < nFeatureCount)
2231 1 : nFeatureCount = nThisFeatureCount;
2232 74 : break;
2233 : }
2234 :
2235 156 : case OFTRealList:
2236 : {
2237 : size_t nThisFeatureCount;
2238 156 : if (eSubType == OFSTFloat32)
2239 : {
2240 : nThisFeatureCount =
2241 41 : FillListArray<int32_t, float, GetFromRealList>(
2242 : psChild, oFeatureQueue, nFeatureCount, bIsNullable,
2243 : i, nMemLimit);
2244 : }
2245 : else
2246 : {
2247 : nThisFeatureCount =
2248 115 : FillListArray<int32_t, double, GetFromRealList>(
2249 : psChild, oFeatureQueue, nFeatureCount, bIsNullable,
2250 : i, nMemLimit);
2251 : }
2252 156 : if (nThisFeatureCount == 0)
2253 : {
2254 1 : goto error_max_mem;
2255 : }
2256 155 : if (nThisFeatureCount < nFeatureCount)
2257 1 : nFeatureCount = nThisFeatureCount;
2258 155 : break;
2259 : }
2260 :
2261 203 : case OFTStringList:
2262 : case OFTWideStringList:
2263 : {
2264 203 : const size_t nThisFeatureCount = FillStringListArray<int32_t>(
2265 : psChild, oFeatureQueue, nFeatureCount, bIsNullable, i,
2266 : nMemLimit);
2267 203 : if (nThisFeatureCount == 0)
2268 : {
2269 1 : goto error_max_mem;
2270 : }
2271 202 : if (nThisFeatureCount < nFeatureCount)
2272 1 : nFeatureCount = nThisFeatureCount;
2273 202 : break;
2274 : }
2275 :
2276 125 : case OFTDate:
2277 : {
2278 125 : if (!FillDateArray(psChild, oFeatureQueue, nFeatureCount,
2279 : bIsNullable, i))
2280 0 : goto error;
2281 125 : break;
2282 : }
2283 :
2284 72 : case OFTTime:
2285 : {
2286 72 : if (!FillTimeArray(psChild, oFeatureQueue, nFeatureCount,
2287 : bIsNullable, i))
2288 0 : goto error;
2289 72 : break;
2290 : }
2291 :
2292 720 : case OFTDateTime:
2293 : {
2294 720 : if (bDateTimeAsString)
2295 : {
2296 8 : const size_t nThisFeatureCount = FillDateTimeArrayAsString(
2297 : psChild, oFeatureQueue, nFeatureCount, bIsNullable, i,
2298 : nMemLimit);
2299 8 : if (nThisFeatureCount == 0)
2300 : {
2301 0 : goto error_max_mem;
2302 : }
2303 8 : if (nThisFeatureCount < nFeatureCount)
2304 0 : nFeatureCount = nThisFeatureCount;
2305 : }
2306 : else
2307 : {
2308 712 : if (!FillDateTimeArray(psChild, oFeatureQueue,
2309 : nFeatureCount, bIsNullable, i,
2310 : poFieldDefn->GetTZFlag()))
2311 0 : goto error;
2312 : }
2313 720 : break;
2314 : }
2315 : }
2316 : }
2317 2666 : for (int i = 0; i < nGeomFieldCount; ++i)
2318 : {
2319 1237 : const auto poFieldDefn = poLayerDefn->GetGeomFieldDefn(i);
2320 1237 : if (poFieldDefn->IsIgnored())
2321 : {
2322 3 : continue;
2323 : }
2324 :
2325 2468 : out_array->children[iSchemaChild] = static_cast<struct ArrowArray *>(
2326 1234 : CPLCalloc(1, sizeof(struct ArrowArray)));
2327 1234 : auto psChild = out_array->children[iSchemaChild];
2328 1234 : ++iSchemaChild;
2329 1234 : psChild->release = OGRLayerDefaultReleaseArray;
2330 1234 : psChild->length = oFeatureQueue.size();
2331 1234 : const size_t nThisFeatureCount = FillWKBGeometryArray<int32_t>(
2332 : psChild, oFeatureQueue, nFeatureCount, poFieldDefn, i, nMemLimit);
2333 1234 : if (nThisFeatureCount == 0)
2334 : {
2335 0 : goto error_max_mem;
2336 : }
2337 1234 : if (nThisFeatureCount < nFeatureCount)
2338 1 : nFeatureCount = nThisFeatureCount;
2339 : }
2340 :
2341 : // Remove consumed features from the queue
2342 1429 : if (nFeatureCount == oFeatureQueue.size())
2343 1420 : oFeatureQueue.clear();
2344 : else
2345 : {
2346 27 : for (size_t i = 0; i < nFeatureCount; ++i)
2347 : {
2348 18 : oFeatureQueue.pop_front();
2349 : }
2350 : }
2351 :
2352 1429 : out_array->n_children = iSchemaChild;
2353 1429 : out_array->length = nFeatureCount;
2354 16060 : for (int i = 0; i < out_array->n_children; ++i)
2355 : {
2356 14631 : out_array->children[i]->length = nFeatureCount;
2357 : }
2358 :
2359 1429 : return 0;
2360 :
2361 25 : error_max_mem:
2362 25 : CPLError(CE_Failure, CPLE_AppDefined,
2363 : "Too large feature: not even a single feature can be returned");
2364 25 : error:
2365 25 : oFeatureQueue.clear();
2366 25 : poPrivate->poShared->m_bEOF = true;
2367 25 : out_array->release(out_array);
2368 25 : memset(out_array, 0, sizeof(*out_array));
2369 25 : return ENOMEM;
2370 : }
2371 :
2372 : /************************************************************************/
2373 : /* StaticGetNextArrowArray() */
2374 : /************************************************************************/
2375 :
2376 : /** Default implementation of the ArrowArrayStream::get_next() callback.
2377 : *
2378 : * To be used by driver implementations that have a custom GetArrowStream()
2379 : * implementation.
2380 : *
2381 : * @since GDAL 3.6
2382 : */
2383 4546 : int OGRLayer::StaticGetNextArrowArray(struct ArrowArrayStream *stream,
2384 : struct ArrowArray *out_array)
2385 : {
2386 : auto poLayer = static_cast<ArrowArrayStreamPrivateDataSharedDataWrapper *>(
2387 4546 : stream->private_data)
2388 4546 : ->poShared->m_poLayer;
2389 4546 : if (poLayer == nullptr)
2390 : {
2391 1 : CPLError(CE_Failure, CPLE_NotSupported,
2392 : "Calling get_next() on a freed OGRLayer is not supported");
2393 1 : return EINVAL;
2394 : }
2395 4545 : return poLayer->GetNextArrowArray(stream, out_array);
2396 : }
2397 :
2398 : /************************************************************************/
2399 : /* ReleaseStream() */
2400 : /************************************************************************/
2401 :
2402 : /** Release a ArrowArrayStream.
2403 : *
2404 : * To be used by driver implementations that have a custom GetArrowStream()
2405 : * implementation.
2406 : *
2407 : * @param stream Arrow array stream to release.
2408 : * @since GDAL 3.6
2409 : */
2410 2235 : void OGRLayer::ReleaseStream(struct ArrowArrayStream *stream)
2411 : {
2412 2235 : assert(stream->release == OGRLayer::ReleaseStream);
2413 2235 : ArrowArrayStreamPrivateDataSharedDataWrapper *poPrivate =
2414 : static_cast<ArrowArrayStreamPrivateDataSharedDataWrapper *>(
2415 : stream->private_data);
2416 2235 : poPrivate->poShared->m_bArrowArrayStreamInProgress = false;
2417 2235 : poPrivate->poShared->m_bEOF = false;
2418 2235 : if (poPrivate->poShared->m_poLayer)
2419 2185 : poPrivate->poShared->m_poLayer->ResetReading();
2420 2235 : delete poPrivate;
2421 2235 : stream->private_data = nullptr;
2422 2235 : stream->release = nullptr;
2423 2235 : }
2424 :
2425 : /************************************************************************/
2426 : /* GetLastErrorArrowArrayStream() */
2427 : /************************************************************************/
2428 :
2429 : /** Default implementation of the ArrowArrayStream::get_last_error() callback.
2430 : *
2431 : * To be used by driver implementations that have a custom GetArrowStream()
2432 : * implementation.
2433 : *
2434 : * @since GDAL 3.6
2435 : */
2436 3 : const char *OGRLayer::GetLastErrorArrowArrayStream(struct ArrowArrayStream *)
2437 : {
2438 3 : const char *pszLastErrorMsg = CPLGetLastErrorMsg();
2439 3 : return pszLastErrorMsg[0] != '\0' ? pszLastErrorMsg : nullptr;
2440 : }
2441 :
2442 : /************************************************************************/
2443 : /* GetArrowStream() */
2444 : /************************************************************************/
2445 :
2446 : /** Get a Arrow C stream.
2447 : *
2448 : * On successful return, and when the stream interfaces is no longer needed, it
2449 : * must must be freed with out_stream->release(out_stream). Please carefully
2450 : * read https://arrow.apache.org/docs/format/CStreamInterface.html for more
2451 : * details on using Arrow C stream.
2452 : *
2453 : * The method may take into account ignored fields set with SetIgnoredFields()
2454 : * (the default implementation does), and should take into account filters set
2455 : * with SetSpatialFilter() and SetAttributeFilter(). Note however that
2456 : * specialized implementations may fallback to the default (slower)
2457 : * implementation when filters are set.
2458 : * Drivers that have a specialized implementation should advertise the
2459 : * OLCFastGetArrowStream capability.
2460 : *
2461 : * There are extra precautions to take into account in a OGR context. Unless
2462 : * otherwise specified by a particular driver implementation, the get_schema(),
2463 : * get_next() and get_last_error() function pointers of the ArrowArrayStream
2464 : * structure should no longer be used after the OGRLayer, from which the
2465 : * ArrowArrayStream structure was initialized, has been destroyed (typically at
2466 : * dataset closing). The reason is that those function pointers will typically
2467 : * point to methods of the OGRLayer instance.
2468 : * However, the ArrowSchema and ArrowArray structures filled from those
2469 : * callbacks can be used and must be released independently from the
2470 : * ArrowArrayStream or the layer.
2471 : *
2472 : * Furthermore, unless otherwise specified by a particular driver
2473 : * implementation, only one ArrowArrayStream can be active at a time on
2474 : * a given layer (that is the last active one must be explicitly released before
2475 : * a next one is asked). Changing filter state, ignored columns, modifying the
2476 : * schema or using ResetReading()/GetNextFeature() while using a
2477 : * ArrowArrayStream is strongly discouraged and may lead to unexpected results.
2478 : * As a rule of thumb, no OGRLayer methods that affect the state of a layer
2479 : * should be called on a layer, while an ArrowArrayStream on it is active.
2480 : *
2481 : * Starting with GDAL 3.8, the ArrowSchema::metadata field filled by the
2482 : * get_schema() callback may be set with the potential following items:
2483 : * <ul>
2484 : * <li>"GDAL:OGR:type": value of OGRFieldDefn::GetType(): (added in 3.11)
2485 : * Only used for DateTime fields when the DATETIME_AS_STRING=YES option is
2486 : * specified.</li>
2487 : * <li>"GDAL:OGR:alternative_name": value of
2488 : * OGRFieldDefn::GetAlternativeNameRef()</li>
2489 : * <li>"GDAL:OGR:comment": value of OGRFieldDefn::GetComment()</li>
2490 : * <li>"GDAL:OGR:default": value of OGRFieldDefn::GetDefault()</li>
2491 : * <li>"GDAL:OGR:subtype": value of OGRFieldDefn::GetSubType()</li>
2492 : * <li>"GDAL:OGR:width": value of OGRFieldDefn::GetWidth() (serialized as a
2493 : * string)</li>
2494 : * <li>"GDAL:OGR:unique": value of OGRFieldDefn::IsUnique() (serialized as
2495 : * "true" or "false")</li>
2496 : * <li>"GDAL:OGR:domain_name": value of OGRFieldDefn::GetDomainName()</li>
2497 : * </ul>
2498 : *
2499 : * A potential usage can be:
2500 : \code{.cpp}
2501 : struct ArrowArrayStream stream;
2502 : if( !poLayer->GetArrowStream(&stream, nullptr))
2503 : {
2504 : CPLError(CE_Failure, CPLE_AppDefined, "GetArrowStream() failed\n");
2505 : exit(1);
2506 : }
2507 : struct ArrowSchema schema;
2508 : if( stream.get_schema(&stream, &schema) == 0 )
2509 : {
2510 : // Do something useful
2511 : schema.release(schema);
2512 : }
2513 : while( true )
2514 : {
2515 : struct ArrowArray array;
2516 : // Look for an error (get_next() returning a non-zero code), or
2517 : // end of iteration (array.release == nullptr)
2518 : if( stream.get_next(&stream, &array) != 0 ||
2519 : array.release == nullptr )
2520 : {
2521 : break;
2522 : }
2523 : // Do something useful
2524 : array.release(&array);
2525 : }
2526 : stream.release(&stream);
2527 : \endcode
2528 : *
2529 : * A full example is available in the
2530 : * <a
2531 : href="https://gdal.org/tutorials/vector_api_tut.html#reading-from-ogr-using-the-arrow-c-stream-data-interface">Reading
2532 : From OGR using the Arrow C Stream data interface</a> tutorial.
2533 : *
2534 : * Options may be driver specific. The default implementation recognizes the
2535 : * following options:
2536 : * <ul>
2537 : * <li>INCLUDE_FID=YES/NO. Whether to include the FID column. Defaults to YES.
2538 : * </li>
2539 : * <li>MAX_FEATURES_IN_BATCH=integer. Maximum number of features to retrieve in
2540 : * a ArrowArray batch. Defaults to 65 536.</li>
2541 : * <li>TIMEZONE="unknown", "UTC", "(+|:)HH:MM" or any other value supported by
2542 : * Arrow. (GDAL >= 3.8)
2543 : * Override the timezone flag nominally provided by
2544 : * OGRFieldDefn::GetTZFlag(), and used for the Arrow field timezone
2545 : * declaration, with a user specified timezone.
2546 : * Note that datetime values in Arrow arrays are always stored in UTC, and
2547 : * that the time zone flag used by GDAL to convert to UTC is the one of the
2548 : * OGRField::Date::TZFlag member at the OGRFeature level. The conversion
2549 : * to UTC of a OGRField::Date is only done if both the timezone indicated by
2550 : * OGRField::Date::TZFlag and the one at the OGRFieldDefn level (or set by
2551 : * this TIMEZONE option) are not unknown.</li>
2552 : * <li>DATETIME_AS_STRING=YES/NO. Defaults to NO. Added in GDAL 3.11.
2553 : * Whether DateTime fields should be returned as a (normally ISO-8601
2554 : * formatted) string by drivers. The aim is to be able to handle mixed
2555 : * timezones (or timezone naive values) in the same column.
2556 : * All drivers must honour that option, and potentially fallback to the
2557 : * OGRLayer generic implementation if they cannot (which is the case for the
2558 : * Arrow, Parquet and ADBC drivers).
2559 : * When DATETIME_AS_STRING=YES, the TIMEZONE option is ignored.
2560 : * </li>
2561 : * <li>GEOMETRY_METADATA_ENCODING=OGC/GEOARROW (GDAL >= 3.8).
2562 : * The default is OGC, which will lead to setting
2563 : * the Arrow geometry column metadata to ARROW:extension:name=ogc.wkb.
2564 : * If setting to GEOMETRY_METADATA_ENCODING to GEOARROW,
2565 : * ARROW:extension:name=geoarrow.wkb and
2566 : * ARROW:extension:metadata={"crs": <projjson CRS representation>> are set.
2567 : * </li>
2568 : * </ul>
2569 : *
2570 : * The Arrow/Parquet drivers recognize the following option:
2571 : * <ul>
2572 : * <li>GEOMETRY_ENCODING=WKB. To force a fallback to the generic implementation
2573 : * when the native geometry encoding is not WKB. Otherwise the geometry
2574 : * will be returned with its native Arrow encoding
2575 : * (possibly using GeoArrow encoding).</li>
2576 : * </ul>
2577 : *
2578 : * @param out_stream Output stream. Must *not* be NULL. The content of the
2579 : * structure does not need to be initialized.
2580 : * @param papszOptions NULL terminated list of key=value options.
2581 : * @return true in case of success.
2582 : * @since GDAL 3.6
2583 : */
2584 2239 : bool OGRLayer::GetArrowStream(struct ArrowArrayStream *out_stream,
2585 : CSLConstList papszOptions)
2586 : {
2587 2239 : memset(out_stream, 0, sizeof(*out_stream));
2588 3793 : if (m_poSharedArrowArrayStreamPrivateData &&
2589 3793 : m_poSharedArrowArrayStreamPrivateData->m_bArrowArrayStreamInProgress)
2590 : {
2591 4 : CPLError(CE_Failure, CPLE_AppDefined,
2592 : "An arrow Arrow Stream is in progress on that layer. Only "
2593 : "one at a time is allowed in this implementation.");
2594 4 : return false;
2595 : }
2596 2235 : m_aosArrowArrayStreamOptions.Assign(CSLDuplicate(papszOptions), true);
2597 :
2598 2235 : out_stream->get_schema = OGRLayer::StaticGetArrowSchema;
2599 2235 : out_stream->get_next = OGRLayer::StaticGetNextArrowArray;
2600 2235 : out_stream->get_last_error = OGRLayer::GetLastErrorArrowArrayStream;
2601 2235 : out_stream->release = OGRLayer::ReleaseStream;
2602 :
2603 2235 : if (m_poSharedArrowArrayStreamPrivateData == nullptr)
2604 : {
2605 : m_poSharedArrowArrayStreamPrivateData =
2606 685 : std::make_shared<ArrowArrayStreamPrivateData>();
2607 685 : m_poSharedArrowArrayStreamPrivateData->m_poLayer = this;
2608 : }
2609 2235 : m_poSharedArrowArrayStreamPrivateData->m_bArrowArrayStreamInProgress = true;
2610 :
2611 : // Special case for "FID = constant", or "FID IN (constant1, ...., constantN)"
2612 2235 : m_poSharedArrowArrayStreamPrivateData->m_anQueriedFIDs.clear();
2613 2235 : m_poSharedArrowArrayStreamPrivateData->m_iQueriedFIDS = 0;
2614 2235 : if (m_poAttrQuery)
2615 : {
2616 : swq_expr_node *poNode =
2617 1062 : static_cast<swq_expr_node *>(m_poAttrQuery->GetSWQExpr());
2618 3186 : if (poNode->eNodeType == SNT_OPERATION &&
2619 1062 : (poNode->nOperation == SWQ_IN || poNode->nOperation == SWQ_EQ) &&
2620 831 : poNode->papoSubExpr[0]->eNodeType == SNT_COLUMN &&
2621 288 : poNode->papoSubExpr[0]->field_index ==
2622 2133 : GetLayerDefn()->GetFieldCount() + SPF_FID &&
2623 9 : TestCapability(OLCRandomRead))
2624 : {
2625 8 : std::set<GIntBig> oSetAlreadyListed;
2626 13 : for (int i = 1; i < poNode->nSubExprCount; ++i)
2627 : {
2628 27 : if (poNode->papoSubExpr[i]->eNodeType == SNT_CONSTANT &&
2629 18 : poNode->papoSubExpr[i]->field_type == SWQ_INTEGER64 &&
2630 9 : oSetAlreadyListed.find(poNode->papoSubExpr[i]->int_value) ==
2631 18 : oSetAlreadyListed.end())
2632 : {
2633 8 : oSetAlreadyListed.insert(poNode->papoSubExpr[i]->int_value);
2634 8 : m_poSharedArrowArrayStreamPrivateData->m_anQueriedFIDs
2635 8 : .push_back(poNode->papoSubExpr[i]->int_value);
2636 : }
2637 : }
2638 : }
2639 : }
2640 :
2641 2235 : auto poPrivateData = new ArrowArrayStreamPrivateDataSharedDataWrapper();
2642 2235 : poPrivateData->poShared = m_poSharedArrowArrayStreamPrivateData;
2643 2235 : out_stream->private_data = poPrivateData;
2644 2235 : return true;
2645 : }
2646 :
2647 : /************************************************************************/
2648 : /* OGR_L_GetArrowStream() */
2649 : /************************************************************************/
2650 :
2651 : /** Get a Arrow C stream.
2652 : *
2653 : * On successful return, and when the stream interfaces is no longer needed, it
2654 : * must be freed with out_stream->release(out_stream). Please carefully read
2655 : * https://arrow.apache.org/docs/format/CStreamInterface.html for more details
2656 : * on using Arrow C stream.
2657 : *
2658 : * The method may take into account ignored fields set with SetIgnoredFields()
2659 : * (the default implementation does), and should take into account filters set
2660 : * with SetSpatialFilter() and SetAttributeFilter(). Note however that
2661 : * specialized implementations may fallback to the default (slower)
2662 : * implementation when filters are set.
2663 : * Drivers that have a specialized implementation should
2664 : * advertise the OLCFastGetArrowStream capability.
2665 : *
2666 : * There are extra precautions to take into account in a OGR context. Unless
2667 : * otherwise specified by a particular driver implementation, the get_schema(),
2668 : * get_next() and get_last_error() function pointers of the ArrowArrayStream
2669 : * structure should no longer be used after the OGRLayer, from which the
2670 : * ArrowArrayStream structure was initialized, has been destroyed (typically at
2671 : * dataset closing). The reason is that those function pointers will typically
2672 : * point to methods of the OGRLayer instance.
2673 : * However, the ArrowSchema and ArrowArray structures filled from those
2674 : * callbacks can be used and must be released independently from the
2675 : * ArrowArrayStream or the layer.
2676 : *
2677 : * Furthermore, unless otherwise specified by a particular driver
2678 : * implementation, only one ArrowArrayStream can be active at a time on
2679 : * a given layer (that is the last active one must be explicitly released before
2680 : * a next one is asked). Changing filter state, ignored columns, modifying the
2681 : * schema or using ResetReading()/GetNextFeature() while using a
2682 : * ArrowArrayStream is strongly discouraged and may lead to unexpected results.
2683 : * As a rule of thumb, no OGRLayer methods that affect the state of a layer
2684 : * should be called on a layer, while an ArrowArrayStream on it is active.
2685 : *
2686 : * Starting with GDAL 3.8, the ArrowSchema::metadata field filled by the
2687 : * get_schema() callback may be set with the potential following items:
2688 : * <ul>
2689 : * <li>"GDAL:OGR:type": value of OGRFieldDefn::GetType(): (added in 3.11)
2690 : * Only used for DateTime fields when the DATETIME_AS_STRING=YES option is
2691 : * specified.</li>
2692 : * <li>"GDAL:OGR:alternative_name": value of
2693 : * OGRFieldDefn::GetAlternativeNameRef()</li>
2694 : * <li>"GDAL:OGR:comment": value of OGRFieldDefn::GetComment()</li>
2695 : * <li>"GDAL:OGR:default": value of OGRFieldDefn::GetDefault()</li>
2696 : * <li>"GDAL:OGR:subtype": value of OGRFieldDefn::GetSubType()</li>
2697 : * <li>"GDAL:OGR:width": value of OGRFieldDefn::GetWidth() (serialized as a
2698 : * string)</li>
2699 : * <li>"GDAL:OGR:unique": value of OGRFieldDefn::IsUnique() (serialized as
2700 : * "true" or "false")</li>
2701 : * <li>"GDAL:OGR:domain_name": value of OGRFieldDefn::GetDomainName()</li>
2702 : * </ul>
2703 : *
2704 : * A potential usage can be:
2705 : \code{.cpp}
2706 : struct ArrowArrayStream stream;
2707 : if( !OGR_L_GetArrowStream(hLayer, &stream, nullptr))
2708 : {
2709 : CPLError(CE_Failure, CPLE_AppDefined,
2710 : "OGR_L_GetArrowStream() failed\n");
2711 : exit(1);
2712 : }
2713 : struct ArrowSchema schema;
2714 : if( stream.get_schema(&stream, &schema) == 0 )
2715 : {
2716 : // Do something useful
2717 : schema.release(schema);
2718 : }
2719 : while( true )
2720 : {
2721 : struct ArrowArray array;
2722 : // Look for an error (get_next() returning a non-zero code), or
2723 : // end of iteration (array.release == nullptr)
2724 : if( stream.get_next(&stream, &array) != 0 ||
2725 : array.release == nullptr )
2726 : {
2727 : break;
2728 : }
2729 : // Do something useful
2730 : array.release(&array);
2731 : }
2732 : stream.release(&stream);
2733 : \endcode
2734 : *
2735 : * A full example is available in the
2736 : * <a
2737 : href="https://gdal.org/tutorials/vector_api_tut.html#reading-from-ogr-using-the-arrow-c-stream-data-interface">Reading
2738 : From OGR using the Arrow C Stream data interface</a> tutorial.
2739 : *
2740 : * Options may be driver specific. The default implementation recognizes the
2741 : * following options:
2742 : * <ul>
2743 : * <li>INCLUDE_FID=YES/NO. Whether to include the FID column. Defaults to
2744 : YES.</li>
2745 : * <li>MAX_FEATURES_IN_BATCH=integer. Maximum number of features to retrieve in
2746 : * a ArrowArray batch. Defaults to 65 536.</li>
2747 : * <li>TIMEZONE="unknown", "UTC", "(+|:)HH:MM" or any other value supported by
2748 : * Arrow. (GDAL >= 3.8)
2749 : * Override the timezone flag nominally provided by
2750 : * OGRFieldDefn::GetTZFlag(), and used for the Arrow field timezone
2751 : * declaration, with a user specified timezone.
2752 : * Note that datetime values in Arrow arrays are always stored in UTC, and
2753 : * that the time zone flag used by GDAL to convert to UTC is the one of the
2754 : * OGRField::Date::TZFlag member at the OGRFeature level. The conversion
2755 : * to UTC of a OGRField::Date is only done if both the timezone indicated by
2756 : * OGRField::Date::TZFlag and the one at the OGRFieldDefn level (or set by
2757 : * this TIMEZONE option) are not unknown.</li>
2758 : * <li>DATETIME_AS_STRING=YES/NO. Defaults to NO. Added in GDAL 3.11.
2759 : * Whether DateTime fields should be returned as a (normally ISO-8601
2760 : * formatted) string by drivers. The aim is to be able to handle mixed
2761 : * timezones (or timezone naive values) in the same column.
2762 : * All drivers must honour that option, and potentially fallback to the
2763 : * OGRLayer generic implementation if they cannot (which is the case for the
2764 : * Arrow, Parquet and ADBC drivers).
2765 : * When DATETIME_AS_STRING=YES, the TIMEZONE option is ignored.
2766 : * </li>
2767 : * <li>GEOMETRY_METADATA_ENCODING=OGC/GEOARROW (GDAL >= 3.8).
2768 : * The default is OGC, which will lead to setting
2769 : * the Arrow geometry column metadata to ARROW:extension:name=ogc.wkb.
2770 : * If setting to GEOMETRY_METADATA_ENCODING to GEOARROW,
2771 : * ARROW:extension:name=geoarrow.wkb and
2772 : * ARROW:extension:metadata={"crs": <projjson CRS representation>> are set.
2773 : * </li>
2774 : * </ul>
2775 : *
2776 : * The Arrow/Parquet drivers recognize the following option:
2777 : * <ul>
2778 : * <li>GEOMETRY_ENCODING=WKB. To force a fallback to the generic implementation
2779 : * when the native geometry encoding is not WKB. Otherwise the geometry
2780 : * will be returned with its native Arrow encoding
2781 : * (possibly using GeoArrow encoding).</li>
2782 : * </ul>
2783 : *
2784 : * @param hLayer Layer
2785 : * @param out_stream Output stream. Must *not* be NULL. The content of the
2786 : * structure does not need to be initialized.
2787 : * @param papszOptions NULL terminated list of key=value options.
2788 : * @return true in case of success.
2789 : * @since GDAL 3.6
2790 : */
2791 374 : bool OGR_L_GetArrowStream(OGRLayerH hLayer, struct ArrowArrayStream *out_stream,
2792 : char **papszOptions)
2793 : {
2794 374 : VALIDATE_POINTER1(hLayer, "OGR_L_GetArrowStream", false);
2795 374 : VALIDATE_POINTER1(out_stream, "OGR_L_GetArrowStream", false);
2796 :
2797 748 : return OGRLayer::FromHandle(hLayer)->GetArrowStream(out_stream,
2798 374 : papszOptions);
2799 : }
2800 :
2801 : /************************************************************************/
2802 : /* OGRParseArrowMetadata() */
2803 : /************************************************************************/
2804 :
2805 : std::map<std::string, std::string>
2806 192 : OGRParseArrowMetadata(const char *pabyMetadata)
2807 : {
2808 192 : std::map<std::string, std::string> oMetadata;
2809 : int32_t nKVP;
2810 192 : memcpy(&nKVP, pabyMetadata, sizeof(int32_t));
2811 192 : pabyMetadata += sizeof(int32_t);
2812 393 : for (int i = 0; i < nKVP; ++i)
2813 : {
2814 : int32_t nSizeKey;
2815 201 : memcpy(&nSizeKey, pabyMetadata, sizeof(int32_t));
2816 201 : pabyMetadata += sizeof(int32_t);
2817 402 : std::string osKey;
2818 201 : osKey.assign(pabyMetadata, nSizeKey);
2819 201 : pabyMetadata += nSizeKey;
2820 :
2821 : int32_t nSizeValue;
2822 201 : memcpy(&nSizeValue, pabyMetadata, sizeof(int32_t));
2823 201 : pabyMetadata += sizeof(int32_t);
2824 402 : std::string osValue;
2825 201 : osValue.assign(pabyMetadata, nSizeValue);
2826 201 : pabyMetadata += nSizeValue;
2827 :
2828 201 : oMetadata[osKey] = std::move(osValue);
2829 : }
2830 :
2831 384 : return oMetadata;
2832 : }
2833 :
2834 : /************************************************************************/
2835 : /* ParseDecimalFormat() */
2836 : /************************************************************************/
2837 :
2838 686 : static bool ParseDecimalFormat(const char *format, int &nPrecision, int &nScale,
2839 : int &nWidthInBytes)
2840 : {
2841 : // d:19,10 ==> decimal128 [precision 19, scale 10]
2842 : // d:19,10,NNN ==> decimal bitwidth = NNN [precision 19, scale 10]
2843 686 : nPrecision = 0;
2844 686 : nScale = 0;
2845 686 : nWidthInBytes = 128 / 8; // 128 bit
2846 686 : const char *pszFirstComma = strchr(format + 2, ',');
2847 686 : if (pszFirstComma)
2848 : {
2849 686 : nPrecision = atoi(format + 2);
2850 686 : nScale = atoi(pszFirstComma + 1);
2851 686 : const char *pszSecondComma = strchr(pszFirstComma + 1, ',');
2852 686 : if (pszSecondComma)
2853 : {
2854 274 : const int nWidthInBits = atoi(pszSecondComma + 1);
2855 274 : if ((nWidthInBits % 8) != 0)
2856 : {
2857 : // shouldn't happen for well-format schemas
2858 0 : nWidthInBytes = 0;
2859 0 : return false;
2860 : }
2861 : else
2862 : {
2863 274 : nWidthInBytes = nWidthInBits / 8;
2864 : }
2865 : }
2866 : }
2867 : else
2868 : {
2869 : // shouldn't happen for well-format schemas
2870 0 : nWidthInBytes = 0;
2871 0 : return false;
2872 : }
2873 686 : return true;
2874 : }
2875 :
2876 : /************************************************************************/
2877 : /* GetErrorIfUnsupportedDecimal() */
2878 : /************************************************************************/
2879 :
2880 55 : static const char *GetErrorIfUnsupportedDecimal(int nWidthInBytes,
2881 : int nPrecision)
2882 : {
2883 :
2884 55 : if (nWidthInBytes != 128 / 8 && nWidthInBytes != 256 / 8)
2885 : {
2886 0 : return "For decimal field, only width 128 and 256 are supported";
2887 : }
2888 :
2889 : // precision=19 fits on 64 bits
2890 55 : if (nPrecision <= 0 || nPrecision > 19)
2891 : {
2892 0 : return "For decimal field, only precision up to 19 is supported";
2893 : }
2894 :
2895 55 : return nullptr;
2896 : }
2897 :
2898 : /************************************************************************/
2899 : /* IsHandledSchema() */
2900 : /************************************************************************/
2901 :
2902 15760 : static bool IsHandledSchema(bool bTopLevel, const struct ArrowSchema *schema,
2903 : const std::string &osPrefix, bool bHasAttrQuery,
2904 : const CPLStringList &aosUsedFields)
2905 : {
2906 15760 : const char *format = schema->format;
2907 15760 : if (IsStructure(format))
2908 : {
2909 12285 : for (int64_t i = 0; i < schema->n_children; ++i)
2910 : {
2911 44156 : if (!IsHandledSchema(/* bTopLevel = */ false,
2912 11039 : schema->children[static_cast<size_t>(i)],
2913 24772 : bTopLevel ? std::string()
2914 13733 : : osPrefix + schema->name + ".",
2915 : bHasAttrQuery, aosUsedFields))
2916 : {
2917 0 : return false;
2918 : }
2919 : }
2920 1246 : return true;
2921 : }
2922 :
2923 : // Lists or maps
2924 25169 : if (IsList(format) || IsLargeList(format) || IsFixedSizeList(format) ||
2925 10655 : IsMap(format))
2926 : {
2927 4566 : if (!IsHandledSchema(/* bTopLevel = */ false, schema->children[0],
2928 : osPrefix, bHasAttrQuery, aosUsedFields))
2929 : {
2930 0 : return false;
2931 : }
2932 : // For now, we can't filter on lists or maps
2933 4566 : if (aosUsedFields.FindString((osPrefix + schema->name).c_str()) >= 0)
2934 : {
2935 0 : CPLDebug("OGR",
2936 : "Field %s has unhandled format '%s' for an "
2937 : "attribute to filter on",
2938 0 : (osPrefix + schema->name).c_str(), format);
2939 0 : return false;
2940 : }
2941 4566 : return true;
2942 : }
2943 :
2944 9948 : const char *const apszHandledFormats[] = {
2945 : "b", // boolean
2946 : "c", // int8
2947 : "C", // uint8
2948 : "s", // int16
2949 : "S", // uint16
2950 : "i", // int32
2951 : "I", // uint32
2952 : "l", // int64
2953 : "L", // uint64
2954 : "e", // float16
2955 : "f", // float32
2956 : "g", // float64,
2957 : "z", // binary
2958 : "Z", // large binary
2959 : "u", // UTF-8 string
2960 : "U", // large UTF-8 string
2961 : "tdD", // date32[days]
2962 : "tdm", // date64[milliseconds]
2963 : "tts", //time32 [seconds]
2964 : "ttm", //time32 [milliseconds]
2965 : "ttu", //time64 [microseconds]
2966 : "ttn", //time64 [nanoseconds]
2967 : };
2968 :
2969 115231 : for (const char *pszHandledFormat : apszHandledFormats)
2970 : {
2971 113923 : if (strcmp(format, pszHandledFormat) == 0)
2972 : {
2973 8640 : return true;
2974 : }
2975 : }
2976 :
2977 1308 : if (IsDecimal(format))
2978 : {
2979 790 : if (bHasAttrQuery &&
2980 790 : aosUsedFields.FindString((osPrefix + schema->name).c_str()) >= 0)
2981 : {
2982 2 : int nPrecision = 0;
2983 2 : int nScale = 0;
2984 2 : int nWidthInBytes = 0;
2985 2 : if (!ParseDecimalFormat(format, nPrecision, nScale, nWidthInBytes))
2986 : {
2987 0 : CPLDebug("OGR", "%s",
2988 0 : (std::string("Invalid field format ") + format +
2989 0 : " for field " + osPrefix + schema->name)
2990 : .c_str());
2991 0 : return false;
2992 : }
2993 :
2994 : const char *pszError =
2995 2 : GetErrorIfUnsupportedDecimal(nWidthInBytes, nPrecision);
2996 2 : if (pszError)
2997 : {
2998 0 : CPLDebug("OGR", "%s", pszError);
2999 0 : return false;
3000 : }
3001 : }
3002 412 : return true;
3003 : }
3004 :
3005 896 : if (IsFixedWidthBinary(format) || IsTimestamp(format))
3006 : {
3007 896 : return true;
3008 : }
3009 :
3010 0 : CPLDebug("OGR", "Field %s has unhandled format '%s'",
3011 0 : (osPrefix + schema->name).c_str(), format);
3012 0 : return false;
3013 : }
3014 :
3015 : /************************************************************************/
3016 : /* OGRLayer::CanPostFilterArrowArray() */
3017 : /************************************************************************/
3018 :
3019 : /** Whether the PostFilterArrowArray() can work on the schema to remove
3020 : * rows that aren't selected by the spatial or attribute filter.
3021 : */
3022 155 : bool OGRLayer::CanPostFilterArrowArray(const struct ArrowSchema *schema) const
3023 : {
3024 155 : if (!IsHandledSchema(
3025 155 : /* bTopLevel=*/true, schema, std::string(),
3026 155 : m_poAttrQuery != nullptr,
3027 310 : m_poAttrQuery ? CPLStringList(m_poAttrQuery->GetUsedFields())
3028 : : CPLStringList()))
3029 : {
3030 0 : return false;
3031 : }
3032 :
3033 155 : if (m_poFilterGeom)
3034 : {
3035 22 : bool bFound = false;
3036 : const char *pszGeomFieldName =
3037 : const_cast<OGRLayer *>(this)
3038 22 : ->GetLayerDefn()
3039 22 : ->GetGeomFieldDefn(m_iGeomFieldFilter)
3040 22 : ->GetNameRef();
3041 839 : for (int64_t i = 0; i < schema->n_children; ++i)
3042 : {
3043 839 : const auto fieldSchema = schema->children[i];
3044 839 : if (strcmp(fieldSchema->name, pszGeomFieldName) == 0)
3045 : {
3046 23 : if (!IsBinary(fieldSchema->format) &&
3047 1 : !IsLargeBinary(fieldSchema->format))
3048 : {
3049 1 : CPLDebug("OGR", "Geometry field %s has handled format '%s'",
3050 : fieldSchema->name, fieldSchema->format);
3051 1 : return false;
3052 : }
3053 :
3054 : // Check if ARROW:extension:name = ogc.wkb
3055 21 : const char *pabyMetadata = fieldSchema->metadata;
3056 21 : if (!pabyMetadata)
3057 : {
3058 0 : CPLDebug(
3059 : "OGR",
3060 : "Geometry field %s lacks metadata in its schema field",
3061 : fieldSchema->name);
3062 0 : return false;
3063 : }
3064 :
3065 21 : const auto oMetadata = OGRParseArrowMetadata(pabyMetadata);
3066 21 : auto oIter = oMetadata.find(ARROW_EXTENSION_NAME_KEY);
3067 21 : if (oIter == oMetadata.end())
3068 : {
3069 0 : CPLDebug("OGR",
3070 : "Geometry field %s lacks "
3071 : "%s metadata "
3072 : "in its schema field",
3073 : fieldSchema->name, ARROW_EXTENSION_NAME_KEY);
3074 0 : return false;
3075 : }
3076 21 : if (oIter->second != EXTENSION_NAME_OGC_WKB &&
3077 0 : oIter->second != EXTENSION_NAME_GEOARROW_WKB)
3078 : {
3079 0 : CPLDebug("OGR",
3080 : "Geometry field %s has unexpected "
3081 : "%s = '%s' metadata "
3082 : "in its schema field",
3083 : fieldSchema->name, ARROW_EXTENSION_NAME_KEY,
3084 0 : oIter->second.c_str());
3085 0 : return false;
3086 : }
3087 :
3088 21 : bFound = true;
3089 21 : break;
3090 : }
3091 : }
3092 21 : if (!bFound)
3093 : {
3094 0 : CPLDebug("OGR", "Cannot find geometry field %s in schema",
3095 : pszGeomFieldName);
3096 0 : return false;
3097 : }
3098 : }
3099 :
3100 154 : return true;
3101 : }
3102 :
3103 : #if 0
3104 : /************************************************************************/
3105 : /* CheckValidityBuffer() */
3106 : /************************************************************************/
3107 :
3108 : static void CheckValidityBuffer(const struct ArrowArray *array)
3109 : {
3110 : if (array->null_count < 0)
3111 : return;
3112 : const uint8_t *pabyValidity =
3113 : static_cast<const uint8_t *>(const_cast<const void *>(array->buffers[0]));
3114 : if( !pabyValidity )
3115 : {
3116 : CPLAssert(array->null_count == 0);
3117 : return;
3118 : }
3119 : size_t null_count = 0;
3120 : const size_t nOffset = static_cast<size_t>(array->offset);
3121 : for(size_t i = 0; i < static_cast<size_t>(array->length); ++i )
3122 : {
3123 : if (!TestBit(pabyValidity, i + nOffset))
3124 : ++ null_count;
3125 : }
3126 : CPLAssert(static_cast<size_t>(array->null_count) == null_count);
3127 : }
3128 : #endif
3129 :
3130 : /************************************************************************/
3131 : /* CompactValidityBuffer() */
3132 : /************************************************************************/
3133 :
3134 7610 : static void CompactValidityBuffer(
3135 : const struct ArrowSchema *, struct ArrowArray *array, size_t iStart,
3136 : const std::vector<bool> &abyValidityFromFilters, size_t nNewLength)
3137 : {
3138 : // Invalidate null_count as the same validity buffer may be used when
3139 : // scrolling batches, and this creates confusion if we try to set it
3140 : // to different values among the batches
3141 7610 : if (array->null_count <= 0)
3142 : {
3143 4186 : array->null_count = -1;
3144 4186 : return;
3145 : }
3146 3424 : array->null_count = -1;
3147 :
3148 3424 : CPLAssert(static_cast<size_t>(array->length) >=
3149 : iStart + abyValidityFromFilters.size());
3150 3424 : uint8_t *pabyValidity =
3151 3424 : static_cast<uint8_t *>(const_cast<void *>(array->buffers[0]));
3152 3424 : const size_t nLength = abyValidityFromFilters.size();
3153 3424 : const size_t nOffset = static_cast<size_t>(array->offset);
3154 3424 : size_t j = iStart + nOffset;
3155 12563 : for (size_t i = 0; i < nLength && j < nNewLength + nOffset; ++i)
3156 : {
3157 9139 : if (abyValidityFromFilters[i])
3158 : {
3159 5663 : if (TestBit(pabyValidity, i + iStart + nOffset))
3160 4307 : SetBit(pabyValidity, j);
3161 : else
3162 1356 : UnsetBit(pabyValidity, j);
3163 5663 : ++j;
3164 : }
3165 : }
3166 : }
3167 :
3168 : /************************************************************************/
3169 : /* CompactBoolArray() */
3170 : /************************************************************************/
3171 :
3172 224 : static void CompactBoolArray(const struct ArrowSchema *schema,
3173 : struct ArrowArray *array, size_t iStart,
3174 : const std::vector<bool> &abyValidityFromFilters,
3175 : size_t nNewLength)
3176 : {
3177 224 : CPLAssert(array->n_children == 0);
3178 224 : CPLAssert(array->n_buffers == 2);
3179 224 : CPLAssert(static_cast<size_t>(array->length) >=
3180 : iStart + abyValidityFromFilters.size());
3181 :
3182 224 : const size_t nLength = abyValidityFromFilters.size();
3183 224 : const size_t nOffset = static_cast<size_t>(array->offset);
3184 224 : uint8_t *pabyData =
3185 224 : static_cast<uint8_t *>(const_cast<void *>(array->buffers[1]));
3186 224 : size_t j = iStart + nOffset;
3187 1147 : for (size_t i = 0; i < nLength; ++i)
3188 : {
3189 923 : if (abyValidityFromFilters[i])
3190 : {
3191 424 : if (TestBit(pabyData, i + iStart + nOffset))
3192 199 : SetBit(pabyData, j);
3193 : else
3194 225 : UnsetBit(pabyData, j);
3195 :
3196 424 : ++j;
3197 : }
3198 : }
3199 :
3200 224 : if (schema->flags & ARROW_FLAG_NULLABLE)
3201 224 : CompactValidityBuffer(schema, array, iStart, abyValidityFromFilters,
3202 : nNewLength);
3203 :
3204 224 : array->length = nNewLength;
3205 224 : }
3206 :
3207 : /************************************************************************/
3208 : /* CompactPrimitiveArray() */
3209 : /************************************************************************/
3210 :
3211 : template <class T>
3212 3503 : static void CompactPrimitiveArray(
3213 : const struct ArrowSchema *schema, struct ArrowArray *array, size_t iStart,
3214 : const std::vector<bool> &abyValidityFromFilters, size_t nNewLength)
3215 : {
3216 3503 : CPLAssert(array->n_children == 0);
3217 3503 : CPLAssert(array->n_buffers == 2);
3218 3503 : CPLAssert(static_cast<size_t>(array->length) >=
3219 : iStart + abyValidityFromFilters.size());
3220 :
3221 3503 : const size_t nLength = abyValidityFromFilters.size();
3222 3503 : const size_t nOffset = static_cast<size_t>(array->offset);
3223 3503 : T *paData =
3224 3503 : static_cast<T *>(const_cast<void *>(array->buffers[1])) + nOffset;
3225 3503 : size_t j = iStart;
3226 17682 : for (size_t i = 0; i < nLength; ++i)
3227 : {
3228 14179 : if (abyValidityFromFilters[i])
3229 : {
3230 6206 : paData[j] = paData[i + iStart];
3231 6206 : ++j;
3232 : }
3233 : }
3234 :
3235 3503 : if (schema->flags & ARROW_FLAG_NULLABLE)
3236 3492 : CompactValidityBuffer(schema, array, iStart, abyValidityFromFilters,
3237 : nNewLength);
3238 :
3239 3503 : array->length = nNewLength;
3240 3503 : }
3241 :
3242 : /************************************************************************/
3243 : /* CompactStringOrBinaryArray() */
3244 : /************************************************************************/
3245 :
3246 : template <class OffsetType>
3247 1187 : static void CompactStringOrBinaryArray(
3248 : const struct ArrowSchema *schema, struct ArrowArray *array, size_t iStart,
3249 : const std::vector<bool> &abyValidityFromFilters, size_t nNewLength)
3250 : {
3251 1187 : CPLAssert(array->n_children == 0);
3252 1187 : CPLAssert(array->n_buffers == 3);
3253 1187 : CPLAssert(static_cast<size_t>(array->length) >=
3254 : iStart + abyValidityFromFilters.size());
3255 :
3256 1187 : const size_t nLength = abyValidityFromFilters.size();
3257 1187 : const size_t nOffset = static_cast<size_t>(array->offset);
3258 1187 : OffsetType *panOffsets =
3259 1187 : static_cast<OffsetType *>(const_cast<void *>(array->buffers[1])) +
3260 : nOffset;
3261 1187 : GByte *pabyData =
3262 1187 : static_cast<GByte *>(const_cast<void *>(array->buffers[2]));
3263 1187 : size_t j = iStart;
3264 1187 : OffsetType nCurOffset = panOffsets[iStart];
3265 5103 : for (size_t i = 0; i < nLength; ++i)
3266 : {
3267 3916 : if (abyValidityFromFilters[i])
3268 : {
3269 1768 : const auto nStartOffset = panOffsets[i + iStart];
3270 1768 : const auto nEndOffset = panOffsets[i + iStart + 1];
3271 1768 : panOffsets[j] = nCurOffset;
3272 1768 : const auto nSize = static_cast<size_t>(nEndOffset - nStartOffset);
3273 1768 : if (nSize)
3274 : {
3275 1562 : if (nCurOffset < nStartOffset)
3276 : {
3277 636 : memmove(pabyData + nCurOffset, pabyData + nStartOffset,
3278 : nSize);
3279 : }
3280 1562 : nCurOffset += static_cast<OffsetType>(nSize);
3281 : }
3282 1768 : ++j;
3283 : }
3284 : }
3285 1187 : panOffsets[j] = nCurOffset;
3286 :
3287 1187 : if (schema->flags & ARROW_FLAG_NULLABLE)
3288 806 : CompactValidityBuffer(schema, array, iStart, abyValidityFromFilters,
3289 : nNewLength);
3290 :
3291 1187 : array->length = nNewLength;
3292 1187 : }
3293 :
3294 : /************************************************************************/
3295 : /* CompactFixedWidthArray() */
3296 : /************************************************************************/
3297 :
3298 : static void
3299 305 : CompactFixedWidthArray(const struct ArrowSchema *schema,
3300 : struct ArrowArray *array, int nWidth, size_t iStart,
3301 : const std::vector<bool> &abyValidityFromFilters,
3302 : size_t nNewLength)
3303 : {
3304 305 : CPLAssert(array->n_children == 0);
3305 305 : CPLAssert(array->n_buffers == 2);
3306 305 : CPLAssert(static_cast<size_t>(array->length) >=
3307 : iStart + abyValidityFromFilters.size());
3308 :
3309 305 : const size_t nLength = abyValidityFromFilters.size();
3310 305 : const size_t nOffset = static_cast<size_t>(array->offset);
3311 305 : GByte *pabyData =
3312 305 : static_cast<GByte *>(const_cast<void *>(array->buffers[1]));
3313 305 : size_t nStartOffset = (iStart + nOffset) * nWidth;
3314 305 : size_t nCurOffset = nStartOffset;
3315 1133 : for (size_t i = 0; i < nLength; ++i, nStartOffset += nWidth)
3316 : {
3317 828 : if (abyValidityFromFilters[i])
3318 : {
3319 391 : if (nCurOffset < nStartOffset)
3320 : {
3321 210 : memcpy(pabyData + nCurOffset, pabyData + nStartOffset, nWidth);
3322 : }
3323 391 : nCurOffset += nWidth;
3324 : }
3325 : }
3326 :
3327 305 : if (schema->flags & ARROW_FLAG_NULLABLE)
3328 305 : CompactValidityBuffer(schema, array, iStart, abyValidityFromFilters,
3329 : nNewLength);
3330 :
3331 305 : array->length = nNewLength;
3332 305 : }
3333 :
3334 : /************************************************************************/
3335 : /* CompactStructArray() */
3336 : /************************************************************************/
3337 :
3338 : static bool CompactArray(const struct ArrowSchema *schema,
3339 : struct ArrowArray *array, size_t iStart,
3340 : const std::vector<bool> &abyValidityFromFilters,
3341 : size_t nNewLength);
3342 :
3343 665 : static bool CompactStructArray(const struct ArrowSchema *schema,
3344 : struct ArrowArray *array, size_t iStart,
3345 : const std::vector<bool> &abyValidityFromFilters,
3346 : size_t nNewLength)
3347 : {
3348 : // The equality might not be strict in the case of when some sub-arrays
3349 : // are fully void !
3350 665 : CPLAssert(array->n_children <= schema->n_children);
3351 6646 : for (int64_t iField = 0; iField < array->n_children; ++iField)
3352 : {
3353 5981 : const auto psChildSchema = schema->children[iField];
3354 5981 : const auto psChildArray = array->children[iField];
3355 : // To please Arrow validation...
3356 5981 : const size_t nChildNewLength =
3357 5981 : static_cast<size_t>(array->offset) + nNewLength;
3358 5981 : if (psChildArray->length > array->length)
3359 : {
3360 120 : std::vector<bool> abyChildValidity(abyValidityFromFilters);
3361 120 : abyChildValidity.resize(
3362 120 : abyValidityFromFilters.size() +
3363 120 : static_cast<size_t>(psChildArray->length - array->length),
3364 : false);
3365 120 : if (!CompactArray(psChildSchema, psChildArray, iStart,
3366 : abyChildValidity, nChildNewLength))
3367 : {
3368 0 : return false;
3369 : }
3370 : }
3371 : else
3372 : {
3373 5861 : if (!CompactArray(psChildSchema, psChildArray, iStart,
3374 : abyValidityFromFilters, nChildNewLength))
3375 : {
3376 0 : return false;
3377 : }
3378 : }
3379 5981 : CPLAssert(psChildArray->length ==
3380 : static_cast<int64_t>(nChildNewLength));
3381 : }
3382 :
3383 665 : if (schema->flags & ARROW_FLAG_NULLABLE)
3384 201 : CompactValidityBuffer(schema, array, iStart, abyValidityFromFilters,
3385 : nNewLength);
3386 :
3387 665 : array->length = nNewLength;
3388 :
3389 665 : return true;
3390 : }
3391 :
3392 : /************************************************************************/
3393 : /* InvalidateNullCountRec() */
3394 : /************************************************************************/
3395 :
3396 570 : static void InvalidateNullCountRec(const struct ArrowSchema *schema,
3397 : struct ArrowArray *array)
3398 : {
3399 570 : if (schema->flags & ARROW_FLAG_NULLABLE)
3400 210 : array->null_count = -1;
3401 960 : for (int i = 0; i < array->n_children; ++i)
3402 390 : InvalidateNullCountRec(schema->children[i], array->children[i]);
3403 570 : }
3404 :
3405 : /************************************************************************/
3406 : /* CompactListArray() */
3407 : /************************************************************************/
3408 :
3409 : template <class OffsetType>
3410 1773 : static bool CompactListArray(const struct ArrowSchema *schema,
3411 : struct ArrowArray *array, size_t iStart,
3412 : const std::vector<bool> &abyValidityFromFilters,
3413 : size_t nNewLength)
3414 : {
3415 1773 : CPLAssert(static_cast<size_t>(array->length) >=
3416 : iStart + abyValidityFromFilters.size());
3417 1773 : CPLAssert(array->n_children == 1);
3418 1773 : CPLAssert(array->n_buffers == 2);
3419 :
3420 1773 : const auto psChildSchema = schema->children[0];
3421 1773 : const auto psChildArray = array->children[0];
3422 :
3423 1773 : const size_t nLength = abyValidityFromFilters.size();
3424 1773 : const size_t nOffset = static_cast<size_t>(array->offset);
3425 1773 : OffsetType *panOffsets =
3426 1773 : static_cast<OffsetType *>(const_cast<void *>(array->buffers[1])) +
3427 : nOffset;
3428 :
3429 1773 : if (panOffsets[iStart + nLength] > panOffsets[iStart])
3430 : {
3431 3186 : std::vector<bool> abyChildValidity(
3432 1593 : static_cast<size_t>(panOffsets[iStart + nLength] -
3433 1593 : panOffsets[iStart]),
3434 : true);
3435 1593 : size_t j = iStart;
3436 1593 : OffsetType nCurOffset = panOffsets[iStart];
3437 6694 : for (size_t i = 0; i < nLength; ++i)
3438 : {
3439 5101 : if (abyValidityFromFilters[i])
3440 : {
3441 2142 : const auto nSize =
3442 2142 : panOffsets[i + iStart + 1] - panOffsets[i + iStart];
3443 2142 : panOffsets[j] = nCurOffset;
3444 2142 : nCurOffset += nSize;
3445 2142 : ++j;
3446 : }
3447 : else
3448 : {
3449 2959 : const auto nStartOffset = panOffsets[i + iStart];
3450 2959 : const auto nEndOffset = panOffsets[i + iStart + 1];
3451 2959 : if (nStartOffset != nEndOffset)
3452 : {
3453 3073 : if (nStartOffset >=
3454 1538 : panOffsets[iStart] + abyChildValidity.size())
3455 : {
3456 : // shouldn't happen in sane arrays...
3457 0 : CPLError(CE_Failure, CPLE_AppDefined,
3458 : "nStartOffset >= panOffsets[iStart] + "
3459 : "abyChildValidity.size()");
3460 0 : return false;
3461 : }
3462 : // nEndOffset might be equal to abyChildValidity.size()
3463 3073 : if (nEndOffset >
3464 1538 : panOffsets[iStart] + abyChildValidity.size())
3465 : {
3466 : // shouldn't happen in sane arrays...
3467 0 : CPLError(CE_Failure, CPLE_AppDefined,
3468 : "nEndOffset > panOffsets[iStart] + "
3469 : "abyChildValidity.size()");
3470 0 : return false;
3471 : }
3472 1538 : for (auto k = nStartOffset - panOffsets[iStart];
3473 4652 : k < nEndOffset - panOffsets[iStart]; ++k)
3474 3114 : abyChildValidity[static_cast<size_t>(k)] = false;
3475 : }
3476 : }
3477 : }
3478 1593 : panOffsets[j] = nCurOffset;
3479 1593 : const size_t nChildNewLength = static_cast<size_t>(panOffsets[j]);
3480 : // To please Arrow validation
3481 4552 : for (; j < iStart + nLength; ++j)
3482 2959 : panOffsets[j] = nCurOffset;
3483 :
3484 1593 : if (!CompactArray(psChildSchema, psChildArray,
3485 1593 : static_cast<size_t>(panOffsets[iStart]),
3486 : abyChildValidity, nChildNewLength))
3487 0 : return false;
3488 :
3489 1593 : CPLAssert(psChildArray->length ==
3490 : static_cast<int64_t>(nChildNewLength));
3491 : }
3492 : else
3493 : {
3494 180 : InvalidateNullCountRec(psChildSchema, psChildArray);
3495 : }
3496 :
3497 1773 : if (schema->flags & ARROW_FLAG_NULLABLE)
3498 1773 : CompactValidityBuffer(schema, array, iStart, abyValidityFromFilters,
3499 : nNewLength);
3500 :
3501 1773 : array->length = nNewLength;
3502 :
3503 1773 : return true;
3504 : }
3505 :
3506 : /************************************************************************/
3507 : /* CompactFixedSizeListArray() */
3508 : /************************************************************************/
3509 :
3510 : static bool
3511 809 : CompactFixedSizeListArray(const struct ArrowSchema *schema,
3512 : struct ArrowArray *array, size_t N, size_t iStart,
3513 : const std::vector<bool> &abyValidityFromFilters,
3514 : size_t nNewLength)
3515 : {
3516 809 : CPLAssert(static_cast<size_t>(array->length) >=
3517 : iStart + abyValidityFromFilters.size());
3518 809 : CPLAssert(array->n_children == 1);
3519 :
3520 809 : const auto psChildSchema = schema->children[0];
3521 809 : const auto psChildArray = array->children[0];
3522 :
3523 809 : const size_t nLength = abyValidityFromFilters.size();
3524 809 : const size_t nOffset = static_cast<size_t>(array->offset);
3525 1618 : std::vector<bool> abyChildValidity(N * nLength, true);
3526 809 : size_t nChildNewLength = (iStart + nOffset) * N;
3527 809 : size_t nSrcLength = 0;
3528 3198 : for (size_t i = 0; i < nLength; ++i)
3529 : {
3530 2389 : if (abyValidityFromFilters[i])
3531 : {
3532 1015 : nChildNewLength += N;
3533 1015 : nSrcLength++;
3534 : }
3535 : else
3536 : {
3537 1374 : const size_t nStartOffset = i * N;
3538 1374 : const size_t nEndOffset = (i + 1) * N;
3539 4122 : for (size_t k = nStartOffset; k < nEndOffset; ++k)
3540 2748 : abyChildValidity[k] = false;
3541 : }
3542 : }
3543 809 : CPL_IGNORE_RET_VAL(nSrcLength);
3544 809 : CPLAssert(iStart + nSrcLength == nNewLength);
3545 :
3546 809 : if (!CompactArray(psChildSchema, psChildArray, (iStart + nOffset) * N,
3547 : abyChildValidity, nChildNewLength))
3548 0 : return false;
3549 :
3550 809 : if (schema->flags & ARROW_FLAG_NULLABLE)
3551 809 : CompactValidityBuffer(schema, array, iStart, abyValidityFromFilters,
3552 : nNewLength);
3553 :
3554 809 : array->length = nNewLength;
3555 :
3556 809 : CPLAssert(psChildArray->length >=
3557 : static_cast<int64_t>(N) * (array->length + array->offset));
3558 :
3559 809 : return true;
3560 : }
3561 :
3562 : /************************************************************************/
3563 : /* CompactMapArray() */
3564 : /************************************************************************/
3565 :
3566 561 : static bool CompactMapArray(const struct ArrowSchema *schema,
3567 : struct ArrowArray *array, size_t iStart,
3568 : const std::vector<bool> &abyValidityFromFilters,
3569 : size_t nNewLength)
3570 : {
3571 561 : return CompactListArray<uint32_t>(schema, array, iStart,
3572 561 : abyValidityFromFilters, nNewLength);
3573 : }
3574 :
3575 : /************************************************************************/
3576 : /* CompactArray() */
3577 : /************************************************************************/
3578 :
3579 8383 : static bool CompactArray(const struct ArrowSchema *schema,
3580 : struct ArrowArray *array, size_t iStart,
3581 : const std::vector<bool> &abyValidityFromFilters,
3582 : size_t nNewLength)
3583 : {
3584 8383 : const char *format = schema->format;
3585 :
3586 8383 : if (IsStructure(format))
3587 : {
3588 582 : if (!CompactStructArray(schema, array, iStart, abyValidityFromFilters,
3589 : nNewLength))
3590 0 : return false;
3591 : }
3592 7801 : else if (IsList(format))
3593 : {
3594 1209 : if (!CompactListArray<uint32_t>(schema, array, iStart,
3595 : abyValidityFromFilters, nNewLength))
3596 0 : return false;
3597 : }
3598 6592 : else if (IsLargeList(format))
3599 : {
3600 3 : if (!CompactListArray<uint64_t>(schema, array, iStart,
3601 : abyValidityFromFilters, nNewLength))
3602 0 : return false;
3603 : }
3604 6589 : else if (IsMap(format))
3605 : {
3606 561 : if (!CompactMapArray(schema, array, iStart, abyValidityFromFilters,
3607 : nNewLength))
3608 0 : return false;
3609 : }
3610 6028 : else if (IsFixedSizeList(format))
3611 : {
3612 809 : const int N = GetFixedSizeList(format);
3613 809 : if (N <= 0)
3614 0 : return false;
3615 809 : if (!CompactFixedSizeListArray(schema, array, static_cast<size_t>(N),
3616 : iStart, abyValidityFromFilters,
3617 : nNewLength))
3618 0 : return false;
3619 : }
3620 5219 : else if (IsBoolean(format))
3621 : {
3622 224 : CompactBoolArray(schema, array, iStart, abyValidityFromFilters,
3623 : nNewLength);
3624 : }
3625 4995 : else if (IsInt8(format) || IsUInt8(format))
3626 : {
3627 444 : CompactPrimitiveArray<uint8_t>(schema, array, iStart,
3628 : abyValidityFromFilters, nNewLength);
3629 : }
3630 4551 : else if (IsInt16(format) || IsUInt16(format) || IsFloat16(format))
3631 : {
3632 458 : CompactPrimitiveArray<uint16_t>(schema, array, iStart,
3633 : abyValidityFromFilters, nNewLength);
3634 : }
3635 7893 : else if (IsInt32(format) || IsUInt32(format) || IsFloat32(format) ||
3636 11323 : strcmp(format, "tdD") == 0 || strcmp(format, "tts") == 0 ||
3637 3430 : strcmp(format, "ttm") == 0)
3638 : {
3639 794 : CompactPrimitiveArray<uint32_t>(schema, array, iStart,
3640 : abyValidityFromFilters, nNewLength);
3641 : }
3642 5879 : else if (IsInt64(format) || IsUInt64(format) || IsFloat64(format) ||
3643 1997 : strcmp(format, "tdm") == 0 || strcmp(format, "ttu") == 0 ||
3644 5879 : strcmp(format, "ttn") == 0 || strncmp(format, "ts", 2) == 0)
3645 : {
3646 1807 : CompactPrimitiveArray<uint64_t>(schema, array, iStart,
3647 : abyValidityFromFilters, nNewLength);
3648 : }
3649 1492 : else if (IsString(format) || IsBinary(format))
3650 : {
3651 983 : CompactStringOrBinaryArray<uint32_t>(
3652 : schema, array, iStart, abyValidityFromFilters, nNewLength);
3653 : }
3654 509 : else if (IsLargeString(format) || IsLargeBinary(format))
3655 : {
3656 204 : CompactStringOrBinaryArray<uint64_t>(
3657 : schema, array, iStart, abyValidityFromFilters, nNewLength);
3658 : }
3659 305 : else if (IsFixedWidthBinary(format))
3660 : {
3661 67 : const int nWidth = GetFixedWithBinary(format);
3662 67 : CompactFixedWidthArray(schema, array, nWidth, iStart,
3663 : abyValidityFromFilters, nNewLength);
3664 : }
3665 238 : else if (IsDecimal(format))
3666 : {
3667 238 : int nPrecision = 0;
3668 238 : int nScale = 0;
3669 238 : int nWidthInBytes = 0;
3670 238 : if (!ParseDecimalFormat(format, nPrecision, nScale, nWidthInBytes))
3671 : {
3672 0 : CPLError(CE_Failure, CPLE_AppDefined,
3673 : "Unexpected error in PostFilterArrowArray(): unhandled "
3674 : "field format: %s",
3675 : format);
3676 :
3677 0 : return false;
3678 : }
3679 238 : CompactFixedWidthArray(schema, array, nWidthInBytes, iStart,
3680 : abyValidityFromFilters, nNewLength);
3681 : }
3682 : else
3683 : {
3684 0 : CPLError(CE_Failure, CPLE_AppDefined,
3685 : "Unexpected error in CompactArray(): unhandled "
3686 : "field format: %s",
3687 : format);
3688 0 : return false;
3689 : }
3690 :
3691 8383 : return true;
3692 : }
3693 :
3694 : /************************************************************************/
3695 : /* FillValidityArrayFromWKBArray() */
3696 : /************************************************************************/
3697 :
3698 : template <class OffsetType>
3699 : static size_t
3700 21 : FillValidityArrayFromWKBArray(struct ArrowArray *array, const OGRLayer *poLayer,
3701 : std::vector<bool> &abyValidityFromFilters)
3702 : {
3703 21 : const size_t nLength = static_cast<size_t>(array->length);
3704 14 : const uint8_t *pabyValidity =
3705 21 : array->null_count == 0
3706 : ? nullptr
3707 7 : : static_cast<const uint8_t *>(array->buffers[0]);
3708 21 : const size_t nOffset = static_cast<size_t>(array->offset);
3709 21 : const OffsetType *panOffsets =
3710 21 : static_cast<const OffsetType *>(array->buffers[1]) + nOffset;
3711 21 : const GByte *pabyData = static_cast<const GByte *>(array->buffers[2]);
3712 21 : OGREnvelope sEnvelope;
3713 21 : abyValidityFromFilters.resize(nLength);
3714 21 : size_t nCountIntersecting = 0;
3715 138 : for (size_t i = 0; i < nLength; ++i)
3716 : {
3717 117 : if (!pabyValidity || TestBit(pabyValidity, i + nOffset))
3718 : {
3719 110 : const GByte *pabyWKB = pabyData + panOffsets[i];
3720 110 : const size_t nWKBSize =
3721 110 : static_cast<size_t>(panOffsets[i + 1] - panOffsets[i]);
3722 110 : if (poLayer->FilterWKBGeometry(pabyWKB, nWKBSize,
3723 : /* bEnvelopeAlreadySet=*/false,
3724 : sEnvelope))
3725 : {
3726 29 : abyValidityFromFilters[i] = true;
3727 29 : nCountIntersecting++;
3728 : }
3729 : }
3730 : }
3731 21 : return nCountIntersecting;
3732 : }
3733 :
3734 : /************************************************************************/
3735 : /* ArrowTimestampToOGRDateTime() */
3736 : /************************************************************************/
3737 :
3738 107 : static void ArrowTimestampToOGRDateTime(int64_t nTimestamp,
3739 : int nInvFactorToSecond,
3740 : const char *pszTZ, OGRFeature &oFeature,
3741 : int iField)
3742 : {
3743 107 : double floatingPart = 0;
3744 107 : if (nInvFactorToSecond)
3745 : {
3746 107 : floatingPart =
3747 107 : (nTimestamp % nInvFactorToSecond) / double(nInvFactorToSecond);
3748 107 : nTimestamp /= nInvFactorToSecond;
3749 : }
3750 107 : int nTZFlag = 0;
3751 107 : const size_t nTZLen = strlen(pszTZ);
3752 107 : if ((nTZLen == 3 && strcmp(pszTZ, "UTC") == 0) ||
3753 0 : (nTZLen == 7 && strcmp(pszTZ, "Etc/UTC") == 0))
3754 : {
3755 17 : nTZFlag = 100;
3756 : }
3757 90 : else if (nTZLen == 6 && (pszTZ[0] == '+' || pszTZ[0] == '-') &&
3758 33 : pszTZ[3] == ':')
3759 : {
3760 33 : int nTZHour = atoi(pszTZ + 1);
3761 33 : int nTZMin = atoi(pszTZ + 4);
3762 33 : if (nTZHour >= 0 && nTZHour <= 14 && nTZMin >= 0 && nTZMin < 60 &&
3763 33 : (nTZMin % 15) == 0)
3764 : {
3765 33 : nTZFlag = (nTZHour * 4) + (nTZMin / 15);
3766 33 : if (pszTZ[0] == '+')
3767 : {
3768 24 : nTZFlag = 100 + nTZFlag;
3769 24 : nTimestamp += nTZHour * 3600 + nTZMin * 60;
3770 : }
3771 : else
3772 : {
3773 9 : nTZFlag = 100 - nTZFlag;
3774 9 : nTimestamp -= nTZHour * 3600 + nTZMin * 60;
3775 : }
3776 : }
3777 : }
3778 : struct tm dt;
3779 107 : CPLUnixTimeToYMDHMS(nTimestamp, &dt);
3780 107 : oFeature.SetField(iField, dt.tm_year + 1900, dt.tm_mon + 1, dt.tm_mday,
3781 : dt.tm_hour, dt.tm_min,
3782 107 : static_cast<float>(dt.tm_sec + floatingPart), nTZFlag);
3783 107 : }
3784 :
3785 : /************************************************************************/
3786 : /* BuildMapFieldNameToArrowPath() */
3787 : /************************************************************************/
3788 :
3789 : static void
3790 334 : BuildMapFieldNameToArrowPath(const struct ArrowSchema *schema,
3791 : std::map<std::string, std::vector<int>> &oMap,
3792 : const std::string &osPrefix,
3793 : std::vector<int> &anArrowPath)
3794 : {
3795 7833 : for (int64_t i = 0; i < schema->n_children; ++i)
3796 : {
3797 7499 : auto psChild = schema->children[i];
3798 7499 : anArrowPath.push_back(static_cast<int>(i));
3799 7499 : if (IsStructure(psChild->format))
3800 : {
3801 400 : std::string osNewPrefix(osPrefix);
3802 200 : osNewPrefix += psChild->name;
3803 200 : osNewPrefix += ".";
3804 200 : BuildMapFieldNameToArrowPath(psChild, oMap, osNewPrefix,
3805 : anArrowPath);
3806 : }
3807 : else
3808 : {
3809 7299 : oMap[osPrefix + psChild->name] = anArrowPath;
3810 : }
3811 7499 : anArrowPath.pop_back();
3812 : }
3813 334 : }
3814 :
3815 : /************************************************************************/
3816 : /* FillFieldList() */
3817 : /************************************************************************/
3818 :
3819 : template <typename ListOffsetType, typename ArrowType,
3820 : typename OGRType = ArrowType>
3821 167 : inline static void FillFieldList(const struct ArrowArray *array,
3822 : int iOGRFieldIdx, size_t nOffsettedIndex,
3823 : const struct ArrowArray *childArray,
3824 : OGRFeature &oFeature)
3825 : {
3826 167 : const auto panOffsets =
3827 167 : static_cast<const ListOffsetType *>(array->buffers[1]) +
3828 : nOffsettedIndex;
3829 334 : std::vector<OGRType> aValues;
3830 167 : const auto *paValues =
3831 167 : static_cast<const ArrowType *>(childArray->buffers[1]);
3832 167 : for (size_t i = static_cast<size_t>(panOffsets[0]);
3833 509 : i < static_cast<size_t>(panOffsets[1]); ++i)
3834 : {
3835 342 : aValues.push_back(static_cast<OGRType>(paValues[i]));
3836 : }
3837 167 : oFeature.SetField(iOGRFieldIdx, static_cast<int>(aValues.size()),
3838 : aValues.data());
3839 167 : }
3840 :
3841 : /************************************************************************/
3842 : /* FillFieldListFromBool() */
3843 : /************************************************************************/
3844 :
3845 : template <typename ListOffsetType>
3846 : inline static void
3847 16 : FillFieldListFromBool(const struct ArrowArray *array, int iOGRFieldIdx,
3848 : size_t nOffsettedIndex,
3849 : const struct ArrowArray *childArray, OGRFeature &oFeature)
3850 : {
3851 16 : const auto panOffsets =
3852 16 : static_cast<const ListOffsetType *>(array->buffers[1]) +
3853 : nOffsettedIndex;
3854 32 : std::vector<int> aValues;
3855 16 : const auto *paValues = static_cast<const uint8_t *>(childArray->buffers[1]);
3856 16 : for (size_t i = static_cast<size_t>(panOffsets[0]);
3857 48 : i < static_cast<size_t>(panOffsets[1]); ++i)
3858 : {
3859 32 : aValues.push_back(TestBit(paValues, i) ? 1 : 0);
3860 : }
3861 16 : oFeature.SetField(iOGRFieldIdx, static_cast<int>(aValues.size()),
3862 16 : aValues.data());
3863 16 : }
3864 :
3865 : /************************************************************************/
3866 : /* FillFieldListFromHalfFloat() */
3867 : /************************************************************************/
3868 :
3869 : template <typename ListOffsetType>
3870 8 : inline static void FillFieldListFromHalfFloat(
3871 : const struct ArrowArray *array, int iOGRFieldIdx, size_t nOffsettedIndex,
3872 : const struct ArrowArray *childArray, OGRFeature &oFeature)
3873 : {
3874 8 : const auto panOffsets =
3875 8 : static_cast<const ListOffsetType *>(array->buffers[1]) +
3876 : nOffsettedIndex;
3877 16 : std::vector<double> aValues;
3878 8 : const auto *paValues =
3879 8 : static_cast<const uint16_t *>(childArray->buffers[1]);
3880 8 : for (size_t i = static_cast<size_t>(panOffsets[0]);
3881 24 : i < static_cast<size_t>(panOffsets[1]); ++i)
3882 : {
3883 16 : const auto nFloat16AsUInt32 = CPLHalfToFloat(paValues[i]);
3884 : float f;
3885 16 : memcpy(&f, &nFloat16AsUInt32, sizeof(f));
3886 16 : aValues.push_back(static_cast<double>(f));
3887 : }
3888 8 : oFeature.SetField(iOGRFieldIdx, static_cast<int>(aValues.size()),
3889 8 : aValues.data());
3890 8 : }
3891 :
3892 : /************************************************************************/
3893 : /* FillFieldListFromString() */
3894 : /************************************************************************/
3895 :
3896 : template <typename ListOffsetType, typename StringOffsetType>
3897 32 : inline static void FillFieldListFromString(const struct ArrowArray *array,
3898 : int iOGRFieldIdx,
3899 : size_t nOffsettedIndex,
3900 : const struct ArrowArray *childArray,
3901 : OGRFeature &oFeature)
3902 : {
3903 32 : const auto panOffsets =
3904 32 : static_cast<const ListOffsetType *>(array->buffers[1]) +
3905 : nOffsettedIndex;
3906 64 : CPLStringList aosVals;
3907 32 : const auto panSubOffsets =
3908 32 : static_cast<const StringOffsetType *>(childArray->buffers[1]);
3909 32 : const char *pszValues = static_cast<const char *>(childArray->buffers[2]);
3910 64 : std::string osTmp;
3911 90 : for (size_t i = static_cast<size_t>(panOffsets[0]);
3912 90 : i < static_cast<size_t>(panOffsets[1]); ++i)
3913 : {
3914 58 : osTmp.assign(
3915 58 : pszValues + panSubOffsets[i],
3916 58 : static_cast<size_t>(panSubOffsets[i + 1] - panSubOffsets[i]));
3917 58 : aosVals.AddString(osTmp.c_str());
3918 : }
3919 32 : oFeature.SetField(iOGRFieldIdx, aosVals.List());
3920 32 : }
3921 :
3922 : /************************************************************************/
3923 : /* FillFieldFixedSizeList() */
3924 : /************************************************************************/
3925 :
3926 : template <typename ArrowType, typename OGRType = ArrowType>
3927 120 : inline static void FillFieldFixedSizeList(
3928 : const struct ArrowArray *, int iOGRFieldIdx, size_t nOffsettedIndex,
3929 : const int nItems, const struct ArrowArray *childArray, OGRFeature &oFeature)
3930 : {
3931 240 : std::vector<OGRType> aValues;
3932 120 : const auto *paValues =
3933 120 : static_cast<const ArrowType *>(childArray->buffers[1]) +
3934 120 : childArray->offset + nOffsettedIndex * nItems;
3935 360 : for (int i = 0; i < nItems; ++i)
3936 : {
3937 240 : aValues.push_back(static_cast<OGRType>(paValues[i]));
3938 : }
3939 120 : oFeature.SetField(iOGRFieldIdx, static_cast<int>(aValues.size()),
3940 : aValues.data());
3941 120 : }
3942 :
3943 : /************************************************************************/
3944 : /* FillFieldFixedSizeListString() */
3945 : /************************************************************************/
3946 :
3947 : template <typename StringOffsetType>
3948 17 : inline static void FillFieldFixedSizeListString(
3949 : const struct ArrowArray *, int iOGRFieldIdx, size_t nOffsettedIndex,
3950 : const int nItems, const struct ArrowArray *childArray, OGRFeature &oFeature)
3951 : {
3952 34 : CPLStringList aosVals;
3953 17 : const auto panSubOffsets =
3954 17 : static_cast<const StringOffsetType *>(childArray->buffers[1]) +
3955 17 : childArray->offset + nOffsettedIndex * nItems;
3956 17 : const char *pszValues = static_cast<const char *>(childArray->buffers[2]);
3957 34 : std::string osTmp;
3958 51 : for (int i = 0; i < nItems; ++i)
3959 : {
3960 34 : osTmp.assign(
3961 34 : pszValues + panSubOffsets[i],
3962 34 : static_cast<size_t>(panSubOffsets[i + 1] - panSubOffsets[i]));
3963 34 : aosVals.AddString(osTmp.c_str());
3964 : }
3965 17 : oFeature.SetField(iOGRFieldIdx, aosVals.List());
3966 17 : }
3967 :
3968 : /************************************************************************/
3969 : /* GetValue() */
3970 : /************************************************************************/
3971 :
3972 : template <typename ArrowType>
3973 245 : inline static ArrowType GetValue(const struct ArrowArray *array,
3974 : size_t iFeature)
3975 : {
3976 245 : const auto *panValues = static_cast<const ArrowType *>(array->buffers[1]);
3977 245 : return panValues[iFeature + array->offset];
3978 : }
3979 :
3980 12 : template <> bool GetValue<bool>(const struct ArrowArray *array, size_t iFeature)
3981 : {
3982 12 : const auto *pabyValues = static_cast<const uint8_t *>(array->buffers[1]);
3983 12 : return TestBit(pabyValues, iFeature + static_cast<size_t>(array->offset));
3984 : }
3985 :
3986 : /************************************************************************/
3987 : /* GetValueFloat16() */
3988 : /************************************************************************/
3989 :
3990 23 : static float GetValueFloat16(const struct ArrowArray *array, const size_t nIdx)
3991 : {
3992 23 : const auto *panValues = static_cast<const uint16_t *>(array->buffers[1]);
3993 : const auto nFloat16AsUInt32 =
3994 23 : CPLHalfToFloat(panValues[nIdx + array->offset]);
3995 : float f;
3996 23 : memcpy(&f, &nFloat16AsUInt32, sizeof(f));
3997 23 : return f;
3998 : }
3999 :
4000 : /************************************************************************/
4001 : /* GetValueDecimal() */
4002 : /************************************************************************/
4003 :
4004 71 : static double GetValueDecimal(const struct ArrowArray *array,
4005 : const int nWidthIn64BitWord, const int nScale,
4006 : const size_t nIdx)
4007 : {
4008 : #ifdef CPL_LSB
4009 71 : const auto nIdxIn64BitWord = nIdx * nWidthIn64BitWord;
4010 : #else
4011 : const auto nIdxIn64BitWord =
4012 : nIdx * nWidthIn64BitWord + nWidthIn64BitWord - 1;
4013 : #endif
4014 71 : const auto *panValues = static_cast<const int64_t *>(array->buffers[1]);
4015 71 : const auto nVal =
4016 71 : panValues[nIdxIn64BitWord + array->offset * nWidthIn64BitWord];
4017 71 : return static_cast<double>(nVal) * std::pow(10.0, -nScale);
4018 : }
4019 :
4020 : /************************************************************************/
4021 : /* GetString() */
4022 : /************************************************************************/
4023 :
4024 : template <class OffsetType>
4025 33 : static std::string GetString(const struct ArrowArray *array, const size_t nIdx)
4026 : {
4027 33 : const OffsetType *panOffsets =
4028 33 : static_cast<const OffsetType *>(array->buffers[1]) +
4029 33 : static_cast<size_t>(array->offset) + nIdx;
4030 33 : const char *pabyStr = static_cast<const char *>(array->buffers[2]);
4031 33 : std::string osStr;
4032 33 : osStr.assign(pabyStr + static_cast<size_t>(panOffsets[0]),
4033 33 : static_cast<size_t>(panOffsets[1] - panOffsets[0]));
4034 33 : return osStr;
4035 : }
4036 :
4037 : /************************************************************************/
4038 : /* GetBinaryAsBase64() */
4039 : /************************************************************************/
4040 :
4041 : template <class OffsetType>
4042 8 : static std::string GetBinaryAsBase64(const struct ArrowArray *array,
4043 : const size_t nIdx)
4044 : {
4045 8 : const OffsetType *panOffsets =
4046 8 : static_cast<const OffsetType *>(array->buffers[1]) +
4047 8 : static_cast<size_t>(array->offset) + nIdx;
4048 8 : const GByte *pabyData = static_cast<const GByte *>(array->buffers[2]);
4049 8 : const size_t nLen = static_cast<size_t>(panOffsets[1] - panOffsets[0]);
4050 8 : if (nLen > static_cast<size_t>(std::numeric_limits<int>::max()))
4051 : {
4052 0 : CPLError(CE_Failure, CPLE_AppDefined, "Too large binary");
4053 0 : return std::string();
4054 : }
4055 16 : char *pszVal = CPLBase64Encode(
4056 8 : static_cast<int>(nLen), pabyData + static_cast<size_t>(panOffsets[0]));
4057 16 : std::string osStr(pszVal);
4058 8 : CPLFree(pszVal);
4059 8 : return osStr;
4060 : }
4061 :
4062 : /************************************************************************/
4063 : /* GetValueFixedWithBinaryAsBase64() */
4064 : /************************************************************************/
4065 :
4066 : static std::string
4067 4 : GetValueFixedWithBinaryAsBase64(const struct ArrowArray *array,
4068 : const int nWidth, const size_t nIdx)
4069 : {
4070 4 : const GByte *pabyData = static_cast<const GByte *>(array->buffers[1]);
4071 8 : char *pszVal = CPLBase64Encode(
4072 : nWidth,
4073 4 : pabyData + (static_cast<size_t>(array->offset) + nIdx) * nWidth);
4074 4 : std::string osStr(pszVal);
4075 4 : CPLFree(pszVal);
4076 4 : return osStr;
4077 : }
4078 :
4079 : static CPLJSONObject GetObjectAsJSON(const struct ArrowSchema *schema,
4080 : const struct ArrowArray *array,
4081 : const size_t nIdx);
4082 :
4083 : /************************************************************************/
4084 : /* AddToArray() */
4085 : /************************************************************************/
4086 :
4087 142 : static void AddToArray(CPLJSONArray &oArray, const struct ArrowSchema *schema,
4088 : const struct ArrowArray *array, const size_t nIdx)
4089 : {
4090 142 : if (IsBoolean(schema->format))
4091 7 : oArray.Add(GetValue<bool>(array, nIdx));
4092 135 : else if (IsUInt8(schema->format))
4093 13 : oArray.Add(GetValue<uint8_t>(array, nIdx));
4094 122 : else if (IsInt8(schema->format))
4095 7 : oArray.Add(GetValue<int8_t>(array, nIdx));
4096 115 : else if (IsUInt16(schema->format))
4097 7 : oArray.Add(GetValue<uint16_t>(array, nIdx));
4098 108 : else if (IsInt16(schema->format))
4099 7 : oArray.Add(GetValue<int16_t>(array, nIdx));
4100 101 : else if (IsUInt32(schema->format))
4101 7 : oArray.Add(static_cast<GIntBig>(GetValue<uint32_t>(array, nIdx)));
4102 94 : else if (IsInt32(schema->format))
4103 7 : oArray.Add(GetValue<int32_t>(array, nIdx));
4104 87 : else if (IsUInt64(schema->format))
4105 7 : oArray.Add(GetValue<uint64_t>(array, nIdx));
4106 80 : else if (IsInt64(schema->format))
4107 7 : oArray.Add(static_cast<GIntBig>(GetValue<int64_t>(array, nIdx)));
4108 73 : else if (IsFloat16(schema->format))
4109 7 : oArray.Add(static_cast<double>(GetValueFloat16(array, nIdx)));
4110 66 : else if (IsFloat32(schema->format))
4111 7 : oArray.Add(static_cast<double>(GetValue<float>(array, nIdx)));
4112 59 : else if (IsFloat64(schema->format))
4113 7 : oArray.Add(GetValue<double>(array, nIdx));
4114 52 : else if (IsString(schema->format))
4115 13 : oArray.Add(GetString<uint32_t>(array, nIdx));
4116 39 : else if (IsLargeString(schema->format))
4117 4 : oArray.Add(GetString<uint64_t>(array, nIdx));
4118 35 : else if (IsBinary(schema->format))
4119 2 : oArray.Add(GetBinaryAsBase64<uint32_t>(array, nIdx));
4120 33 : else if (IsLargeBinary(schema->format))
4121 2 : oArray.Add(GetBinaryAsBase64<uint64_t>(array, nIdx));
4122 31 : else if (IsFixedWidthBinary(schema->format))
4123 2 : oArray.Add(GetValueFixedWithBinaryAsBase64(
4124 2 : array, GetFixedWithBinary(schema->format), nIdx));
4125 29 : else if (IsDecimal(schema->format))
4126 : {
4127 7 : int nPrecision = 0;
4128 7 : int nScale = 0;
4129 7 : int nWidthInBytes = 0;
4130 7 : const bool bOK = ParseDecimalFormat(schema->format, nPrecision, nScale,
4131 7 : nWidthInBytes);
4132 : // Already validated
4133 7 : CPLAssert(bOK);
4134 7 : CPL_IGNORE_RET_VAL(bOK);
4135 7 : oArray.Add(GetValueDecimal(array, nWidthInBytes / 8, nScale, nIdx));
4136 : }
4137 : else
4138 22 : oArray.Add(GetObjectAsJSON(schema, array, nIdx));
4139 142 : }
4140 :
4141 : /************************************************************************/
4142 : /* GetListAsJSON() */
4143 : /************************************************************************/
4144 :
4145 : template <class OffsetType>
4146 112 : static CPLJSONArray GetListAsJSON(const struct ArrowSchema *schema,
4147 : const struct ArrowArray *array,
4148 : const size_t nIdx)
4149 : {
4150 112 : CPLJSONArray oArray;
4151 112 : const auto panOffsets = static_cast<const OffsetType *>(array->buffers[1]) +
4152 112 : array->offset + nIdx;
4153 112 : const auto childSchema = schema->children[0];
4154 112 : const auto childArray = array->children[0];
4155 5 : const uint8_t *pabyValidity =
4156 112 : childArray->null_count == 0
4157 : ? nullptr
4158 107 : : static_cast<const uint8_t *>(childArray->buffers[0]);
4159 278 : for (size_t k = static_cast<size_t>(panOffsets[0]);
4160 278 : k < static_cast<size_t>(panOffsets[1]); k++)
4161 : {
4162 318 : if (!pabyValidity ||
4163 152 : TestBit(pabyValidity, k + static_cast<size_t>(childArray->offset)))
4164 : {
4165 136 : AddToArray(oArray, childSchema, childArray, k);
4166 : }
4167 : else
4168 : {
4169 30 : oArray.AddNull();
4170 : }
4171 : }
4172 112 : return oArray;
4173 : }
4174 :
4175 : /************************************************************************/
4176 : /* GetFixedSizeListAsJSON() */
4177 : /************************************************************************/
4178 :
4179 3 : static CPLJSONArray GetFixedSizeListAsJSON(const struct ArrowSchema *schema,
4180 : const struct ArrowArray *array,
4181 : const size_t nIdx)
4182 : {
4183 3 : CPLJSONArray oArray;
4184 3 : const int nVals = GetFixedSizeList(schema->format);
4185 3 : const auto childSchema = schema->children[0];
4186 3 : const auto childArray = array->children[0];
4187 3 : const uint8_t *pabyValidity =
4188 3 : childArray->null_count == 0
4189 3 : ? nullptr
4190 3 : : static_cast<const uint8_t *>(childArray->buffers[0]);
4191 9 : for (size_t k = nIdx * nVals; k < (nIdx + 1) * nVals; k++)
4192 : {
4193 12 : if (!pabyValidity ||
4194 6 : TestBit(pabyValidity, k + static_cast<size_t>(childArray->offset)))
4195 : {
4196 6 : AddToArray(oArray, childSchema, childArray, k);
4197 : }
4198 : else
4199 : {
4200 0 : oArray.AddNull();
4201 : }
4202 : }
4203 3 : return oArray;
4204 : }
4205 :
4206 : /************************************************************************/
4207 : /* AddToDict() */
4208 : /************************************************************************/
4209 :
4210 198 : static void AddToDict(CPLJSONObject &oDict, const std::string &osKey,
4211 : const struct ArrowSchema *schema,
4212 : const struct ArrowArray *array, const size_t nIdx)
4213 : {
4214 198 : if (IsBoolean(schema->format))
4215 5 : oDict.Add(osKey, GetValue<bool>(array, nIdx));
4216 193 : else if (IsUInt8(schema->format))
4217 5 : oDict.Add(osKey, GetValue<uint8_t>(array, nIdx));
4218 188 : else if (IsInt8(schema->format))
4219 5 : oDict.Add(osKey, GetValue<int8_t>(array, nIdx));
4220 183 : else if (IsUInt16(schema->format))
4221 5 : oDict.Add(osKey, GetValue<uint16_t>(array, nIdx));
4222 178 : else if (IsInt16(schema->format))
4223 5 : oDict.Add(osKey, GetValue<int16_t>(array, nIdx));
4224 173 : else if (IsUInt32(schema->format))
4225 2 : oDict.Add(osKey, static_cast<GIntBig>(GetValue<uint32_t>(array, nIdx)));
4226 171 : else if (IsInt32(schema->format))
4227 6 : oDict.Add(osKey, GetValue<int32_t>(array, nIdx));
4228 165 : else if (IsUInt64(schema->format))
4229 5 : oDict.Add(osKey, GetValue<uint64_t>(array, nIdx));
4230 160 : else if (IsInt64(schema->format))
4231 22 : oDict.Add(osKey, static_cast<GIntBig>(GetValue<int64_t>(array, nIdx)));
4232 138 : else if (IsFloat16(schema->format))
4233 2 : oDict.Add(osKey, static_cast<double>(GetValueFloat16(array, nIdx)));
4234 136 : else if (IsFloat32(schema->format))
4235 5 : oDict.Add(osKey, static_cast<double>(GetValue<float>(array, nIdx)));
4236 131 : else if (IsFloat64(schema->format))
4237 19 : oDict.Add(osKey, GetValue<double>(array, nIdx));
4238 112 : else if (IsString(schema->format))
4239 14 : oDict.Add(osKey, GetString<uint32_t>(array, nIdx));
4240 98 : else if (IsLargeString(schema->format))
4241 2 : oDict.Add(osKey, GetString<uint64_t>(array, nIdx));
4242 96 : else if (IsBinary(schema->format))
4243 2 : oDict.Add(osKey, GetBinaryAsBase64<uint32_t>(array, nIdx));
4244 94 : else if (IsLargeBinary(schema->format))
4245 2 : oDict.Add(osKey, GetBinaryAsBase64<uint64_t>(array, nIdx));
4246 92 : else if (IsFixedWidthBinary(schema->format))
4247 2 : oDict.Add(osKey, GetValueFixedWithBinaryAsBase64(
4248 2 : array, GetFixedWithBinary(schema->format), nIdx));
4249 90 : else if (IsDecimal(schema->format))
4250 : {
4251 8 : int nPrecision = 0;
4252 8 : int nScale = 0;
4253 8 : int nWidthInBytes = 0;
4254 8 : const bool bOK = ParseDecimalFormat(schema->format, nPrecision, nScale,
4255 8 : nWidthInBytes);
4256 : // Already validated
4257 8 : CPLAssert(bOK);
4258 8 : CPL_IGNORE_RET_VAL(bOK);
4259 8 : oDict.Add(osKey,
4260 : GetValueDecimal(array, nWidthInBytes / 8, nScale, nIdx));
4261 : }
4262 : else
4263 82 : oDict.Add(osKey, GetObjectAsJSON(schema, array, nIdx));
4264 198 : }
4265 :
4266 : /************************************************************************/
4267 : /* GetMapAsJSON() */
4268 : /************************************************************************/
4269 :
4270 243 : static CPLJSONObject GetMapAsJSON(const struct ArrowSchema *schema,
4271 : const struct ArrowArray *array,
4272 : const size_t nIdx)
4273 : {
4274 243 : const auto schemaStruct = schema->children[0];
4275 243 : if (!IsStructure(schemaStruct->format))
4276 : {
4277 0 : CPLError(CE_Failure, CPLE_AppDefined,
4278 : "GetMapAsJSON(): !IsStructure(schemaStruct->format))");
4279 0 : return CPLJSONObject();
4280 : }
4281 243 : const auto schemaKey = schemaStruct->children[0];
4282 243 : const auto schemaValues = schemaStruct->children[1];
4283 243 : if (!IsString(schemaKey->format))
4284 : {
4285 0 : CPLError(CE_Failure, CPLE_AppDefined,
4286 : "GetMapAsJSON(): !IsString(schemaKey->format))");
4287 0 : return CPLJSONObject();
4288 : }
4289 243 : const auto arrayKeys = array->children[0]->children[0];
4290 243 : const auto arrayValues = array->children[0]->children[1];
4291 :
4292 486 : CPLJSONObject oDict;
4293 243 : const auto panOffsets =
4294 243 : static_cast<const uint32_t *>(array->buffers[1]) + array->offset + nIdx;
4295 243 : const uint8_t *pabyValidityKeys =
4296 243 : arrayKeys->null_count == 0
4297 243 : ? nullptr
4298 0 : : static_cast<const uint8_t *>(arrayKeys->buffers[0]);
4299 243 : const uint32_t *panOffsetsKeys =
4300 243 : static_cast<const uint32_t *>(arrayKeys->buffers[1]) +
4301 243 : arrayKeys->offset;
4302 243 : const char *pabyKeys = static_cast<const char *>(arrayKeys->buffers[2]);
4303 243 : const uint8_t *pabyValidityValues =
4304 243 : arrayValues->null_count == 0
4305 243 : ? nullptr
4306 237 : : static_cast<const uint8_t *>(arrayValues->buffers[0]);
4307 463 : for (uint32_t k = panOffsets[0]; k < panOffsets[1]; k++)
4308 : {
4309 220 : if (!pabyValidityKeys ||
4310 0 : TestBit(pabyValidityKeys,
4311 0 : k + static_cast<size_t>(arrayKeys->offset)))
4312 : {
4313 440 : std::string osKey;
4314 220 : osKey.assign(pabyKeys + panOffsetsKeys[k],
4315 220 : panOffsetsKeys[k + 1] - panOffsetsKeys[k]);
4316 :
4317 433 : if (!pabyValidityValues ||
4318 213 : TestBit(pabyValidityValues,
4319 213 : k + static_cast<size_t>(arrayValues->offset)))
4320 : {
4321 168 : AddToDict(oDict, osKey, schemaValues, arrayValues, k);
4322 : }
4323 : else
4324 : {
4325 52 : oDict.AddNull(osKey);
4326 : }
4327 : }
4328 : }
4329 243 : return oDict;
4330 : }
4331 :
4332 : /************************************************************************/
4333 : /* GetStructureAsJSON() */
4334 : /************************************************************************/
4335 :
4336 16 : static CPLJSONObject GetStructureAsJSON(const struct ArrowSchema *schema,
4337 : const struct ArrowArray *array,
4338 : const size_t nIdx)
4339 : {
4340 16 : CPLJSONObject oDict;
4341 62 : for (int64_t k = 0; k < array->n_children; k++)
4342 : {
4343 46 : const uint8_t *pabyValidityValues =
4344 46 : array->children[k]->null_count == 0
4345 46 : ? nullptr
4346 36 : : static_cast<const uint8_t *>(array->children[k]->buffers[0]);
4347 82 : if (!pabyValidityValues ||
4348 36 : TestBit(pabyValidityValues,
4349 36 : nIdx + static_cast<size_t>(array->children[k]->offset)))
4350 : {
4351 30 : AddToDict(oDict, schema->children[k]->name, schema->children[k],
4352 30 : array->children[k], nIdx);
4353 : }
4354 : else
4355 : {
4356 16 : oDict.AddNull(schema->children[k]->name);
4357 : }
4358 : }
4359 16 : return oDict;
4360 : }
4361 :
4362 : /************************************************************************/
4363 : /* GetObjectAsJSON() */
4364 : /************************************************************************/
4365 :
4366 104 : static CPLJSONObject GetObjectAsJSON(const struct ArrowSchema *schema,
4367 : const struct ArrowArray *array,
4368 : const size_t nIdx)
4369 : {
4370 104 : if (IsMap(schema->format))
4371 4 : return GetMapAsJSON(schema, array, nIdx);
4372 100 : else if (IsList(schema->format))
4373 156 : return GetListAsJSON<uint32_t>(schema, array, nIdx);
4374 22 : else if (IsLargeList(schema->format))
4375 6 : return GetListAsJSON<uint64_t>(schema, array, nIdx);
4376 19 : else if (IsFixedSizeList(schema->format))
4377 6 : return GetFixedSizeListAsJSON(schema, array, nIdx);
4378 16 : else if (IsStructure(schema->format))
4379 16 : return GetStructureAsJSON(schema, array, nIdx);
4380 : else
4381 : {
4382 0 : CPLError(CE_Failure, CPLE_AppDefined,
4383 : "GetObjectAsJSON(): unhandled value format: %s",
4384 0 : schema->format);
4385 0 : return CPLJSONObject();
4386 : }
4387 : }
4388 :
4389 : /************************************************************************/
4390 : /* SetFieldForOtherFormats() */
4391 : /************************************************************************/
4392 :
4393 856 : static bool SetFieldForOtherFormats(OGRFeature &oFeature,
4394 : const int iOGRFieldIndex,
4395 : const size_t nOffsettedIndex,
4396 : const struct ArrowSchema *schema,
4397 : const struct ArrowArray *array)
4398 : {
4399 856 : const char *format = schema->format;
4400 856 : if (IsFloat16(format))
4401 : {
4402 4 : oFeature.SetField(
4403 : iOGRFieldIndex,
4404 4 : static_cast<double>(GetValueFloat16(
4405 4 : array, nOffsettedIndex - static_cast<size_t>(array->offset))));
4406 : }
4407 :
4408 852 : else if (IsFixedWidthBinary(format))
4409 : {
4410 : // Fixed width binary
4411 17 : const int nWidth = GetFixedWithBinary(format);
4412 17 : oFeature.SetField(iOGRFieldIndex, nWidth,
4413 17 : static_cast<const GByte *>(array->buffers[1]) +
4414 17 : nOffsettedIndex * nWidth);
4415 : }
4416 835 : else if (format[0] == 't' && format[1] == 'd' &&
4417 38 : format[2] == 'D') // strcmp(format, "tdD") == 0
4418 : {
4419 : // date32[days]
4420 : // number of days since Epoch
4421 33 : int64_t timestamp = static_cast<int64_t>(static_cast<const int32_t *>(
4422 33 : array->buffers[1])[nOffsettedIndex]) *
4423 : 3600 * 24;
4424 : struct tm dt;
4425 33 : CPLUnixTimeToYMDHMS(timestamp, &dt);
4426 33 : oFeature.SetField(iOGRFieldIndex, dt.tm_year + 1900, dt.tm_mon + 1,
4427 : dt.tm_mday, 0, 0, 0);
4428 33 : return true;
4429 : }
4430 802 : else if (format[0] == 't' && format[1] == 'd' &&
4431 5 : format[2] == 'm') // strcmp(format, "tdm") == 0
4432 : {
4433 : // date64[milliseconds]
4434 : // number of milliseconds since Epoch
4435 5 : int64_t timestamp =
4436 5 : static_cast<const int64_t *>(array->buffers[1])[nOffsettedIndex] /
4437 : 1000;
4438 : struct tm dt;
4439 5 : CPLUnixTimeToYMDHMS(timestamp, &dt);
4440 5 : oFeature.SetField(iOGRFieldIndex, dt.tm_year + 1900, dt.tm_mon + 1,
4441 5 : dt.tm_mday, 0, 0, 0);
4442 : }
4443 797 : else if (format[0] == 't' && format[1] == 't' &&
4444 39 : format[2] == 's') // strcmp(format, "tts") == 0
4445 : {
4446 : // time32 [seconds]
4447 0 : int32_t value =
4448 0 : static_cast<const int32_t *>(array->buffers[1])[nOffsettedIndex];
4449 0 : const int nHour = value / 3600;
4450 0 : const int nMinute = (value / 60) % 60;
4451 0 : const int nSecond = value % 60;
4452 0 : oFeature.SetField(iOGRFieldIndex, 0, 0, 0, nHour, nMinute,
4453 0 : static_cast<float>(nSecond));
4454 : }
4455 797 : else if (format[0] == 't' && format[1] == 't' &&
4456 39 : format[2] == 'm') // strcmp(format, "ttm") == 0
4457 : {
4458 : // time32 [milliseconds]
4459 25 : int32_t value =
4460 25 : static_cast<const int32_t *>(array->buffers[1])[nOffsettedIndex];
4461 25 : double floatingPart = (value % 1000) / 1e3;
4462 25 : value /= 1000;
4463 25 : const int nHour = value / 3600;
4464 25 : const int nMinute = (value / 60) % 60;
4465 25 : const int nSecond = value % 60;
4466 25 : oFeature.SetField(iOGRFieldIndex, 0, 0, 0, nHour, nMinute,
4467 25 : static_cast<float>(nSecond + floatingPart));
4468 : }
4469 772 : else if (format[0] == 't' && format[1] == 't' &&
4470 14 : (format[2] == 'u' || // time64 [microseconds]
4471 7 : format[2] == 'n')) // time64 [nanoseconds]
4472 : {
4473 14 : int64_t value =
4474 14 : static_cast<const int64_t *>(array->buffers[1])[nOffsettedIndex];
4475 14 : if (oFeature.GetFieldDefnRef(iOGRFieldIndex)->GetType() == OFTInteger64)
4476 : {
4477 2 : oFeature.SetField(iOGRFieldIndex, static_cast<GIntBig>(value));
4478 : }
4479 : else
4480 : {
4481 : double floatingPart;
4482 12 : if (format[2] == 'u')
4483 : {
4484 5 : floatingPart = (value % (1000 * 1000)) / 1e6;
4485 5 : value /= 1000 * 1000;
4486 : }
4487 : else
4488 : {
4489 7 : floatingPart = (value % (1000 * 1000 * 1000)) / 1e9;
4490 7 : value /= 1000 * 1000 * 1000;
4491 : }
4492 12 : const int nHour = static_cast<int>(value / 3600);
4493 12 : const int nMinute = static_cast<int>((value / 60) % 60);
4494 12 : const int nSecond = static_cast<int>(value % 60);
4495 12 : oFeature.SetField(iOGRFieldIndex, 0, 0, 0, nHour, nMinute,
4496 12 : static_cast<float>(nSecond + floatingPart));
4497 14 : }
4498 : }
4499 758 : else if (IsTimestampSeconds(format))
4500 : {
4501 0 : ArrowTimestampToOGRDateTime(
4502 0 : static_cast<const int64_t *>(array->buffers[1])[nOffsettedIndex], 1,
4503 : GetTimestampTimezone(format), oFeature, iOGRFieldIndex);
4504 : }
4505 758 : else if (IsTimestampMilliseconds(format))
4506 : {
4507 73 : ArrowTimestampToOGRDateTime(
4508 73 : static_cast<const int64_t *>(array->buffers[1])[nOffsettedIndex],
4509 : 1000, GetTimestampTimezone(format), oFeature, iOGRFieldIndex);
4510 : }
4511 685 : else if (IsTimestampMicroseconds(format))
4512 : {
4513 34 : ArrowTimestampToOGRDateTime(
4514 34 : static_cast<const int64_t *>(array->buffers[1])[nOffsettedIndex],
4515 : 1000 * 1000, GetTimestampTimezone(format), oFeature,
4516 : iOGRFieldIndex);
4517 : }
4518 651 : else if (IsTimestampNanoseconds(format))
4519 : {
4520 0 : ArrowTimestampToOGRDateTime(
4521 0 : static_cast<const int64_t *>(array->buffers[1])[nOffsettedIndex],
4522 : 1000 * 1000 * 1000, GetTimestampTimezone(format), oFeature,
4523 : iOGRFieldIndex);
4524 : }
4525 651 : else if (IsFixedSizeList(format))
4526 : {
4527 154 : const int nItems = GetFixedSizeList(format);
4528 154 : const auto childArray = array->children[0];
4529 154 : const char *childFormat = schema->children[0]->format;
4530 154 : if (IsBoolean(childFormat))
4531 : {
4532 24 : std::vector<int> aValues;
4533 12 : const auto *paValues =
4534 12 : static_cast<const uint8_t *>(childArray->buffers[1]);
4535 36 : for (int i = 0; i < nItems; ++i)
4536 : {
4537 24 : aValues.push_back(
4538 24 : TestBit(paValues,
4539 24 : static_cast<size_t>(childArray->offset +
4540 24 : nOffsettedIndex * nItems + i))
4541 24 : ? 1
4542 : : 0);
4543 : }
4544 12 : oFeature.SetField(iOGRFieldIndex, static_cast<int>(aValues.size()),
4545 12 : aValues.data());
4546 : }
4547 142 : else if (IsInt8(childFormat))
4548 : {
4549 12 : FillFieldFixedSizeList<int8_t, int>(array, iOGRFieldIndex,
4550 : nOffsettedIndex, nItems,
4551 : childArray, oFeature);
4552 : }
4553 130 : else if (IsUInt8(childFormat))
4554 : {
4555 12 : FillFieldFixedSizeList<uint8_t, int>(array, iOGRFieldIndex,
4556 : nOffsettedIndex, nItems,
4557 : childArray, oFeature);
4558 : }
4559 118 : else if (IsInt16(childFormat))
4560 : {
4561 12 : FillFieldFixedSizeList<int16_t, int>(array, iOGRFieldIndex,
4562 : nOffsettedIndex, nItems,
4563 : childArray, oFeature);
4564 : }
4565 106 : else if (IsUInt16(childFormat))
4566 : {
4567 12 : FillFieldFixedSizeList<uint16_t, int>(array, iOGRFieldIndex,
4568 : nOffsettedIndex, nItems,
4569 : childArray, oFeature);
4570 : }
4571 94 : else if (IsInt32(childFormat))
4572 : {
4573 12 : FillFieldFixedSizeList<int32_t, int>(array, iOGRFieldIndex,
4574 : nOffsettedIndex, nItems,
4575 : childArray, oFeature);
4576 : }
4577 82 : else if (IsUInt32(childFormat))
4578 : {
4579 5 : FillFieldFixedSizeList<uint32_t, GIntBig>(array, iOGRFieldIndex,
4580 : nOffsettedIndex, nItems,
4581 : childArray, oFeature);
4582 : }
4583 77 : else if (IsInt64(childFormat))
4584 : {
4585 19 : FillFieldFixedSizeList<int64_t, GIntBig>(array, iOGRFieldIndex,
4586 : nOffsettedIndex, nItems,
4587 : childArray, oFeature);
4588 : }
4589 58 : else if (IsUInt64(childFormat))
4590 : {
4591 12 : FillFieldFixedSizeList<uint64_t, double>(array, iOGRFieldIndex,
4592 : nOffsettedIndex, nItems,
4593 : childArray, oFeature);
4594 : }
4595 46 : else if (IsFloat16(childFormat))
4596 : {
4597 10 : std::vector<double> aValues;
4598 15 : for (int i = 0; i < nItems; ++i)
4599 : {
4600 10 : aValues.push_back(static_cast<double>(
4601 10 : GetValueFloat16(childArray, nOffsettedIndex * nItems + i)));
4602 : }
4603 5 : oFeature.SetField(iOGRFieldIndex, static_cast<int>(aValues.size()),
4604 5 : aValues.data());
4605 : }
4606 41 : else if (IsFloat32(childFormat))
4607 : {
4608 12 : FillFieldFixedSizeList<float, double>(array, iOGRFieldIndex,
4609 : nOffsettedIndex, nItems,
4610 : childArray, oFeature);
4611 : }
4612 29 : else if (IsFloat64(childFormat))
4613 : {
4614 12 : FillFieldFixedSizeList<double, double>(array, iOGRFieldIndex,
4615 : nOffsettedIndex, nItems,
4616 : childArray, oFeature);
4617 : }
4618 17 : else if (IsString(childFormat))
4619 : {
4620 12 : FillFieldFixedSizeListString<uint32_t>(array, iOGRFieldIndex,
4621 : nOffsettedIndex, nItems,
4622 : childArray, oFeature);
4623 : }
4624 5 : else if (IsLargeString(childFormat))
4625 : {
4626 5 : FillFieldFixedSizeListString<uint64_t>(array, iOGRFieldIndex,
4627 : nOffsettedIndex, nItems,
4628 : childArray, oFeature);
4629 : }
4630 : }
4631 497 : else if (IsList(format) || IsLargeList(format))
4632 : {
4633 254 : const auto childArray = array->children[0];
4634 254 : const char *childFormat = schema->children[0]->format;
4635 254 : if (IsBoolean(childFormat))
4636 : {
4637 16 : if (format[1] == ARROW_2ND_LETTER_LIST)
4638 12 : FillFieldListFromBool<uint32_t>(array, iOGRFieldIndex,
4639 : nOffsettedIndex, childArray,
4640 : oFeature);
4641 : else
4642 4 : FillFieldListFromBool<uint64_t>(array, iOGRFieldIndex,
4643 : nOffsettedIndex, childArray,
4644 : oFeature);
4645 : }
4646 238 : else if (IsInt8(childFormat))
4647 : {
4648 14 : if (format[1] == ARROW_2ND_LETTER_LIST)
4649 10 : FillFieldList<uint32_t, int8_t, int>(array, iOGRFieldIndex,
4650 : nOffsettedIndex,
4651 : childArray, oFeature);
4652 : else
4653 4 : FillFieldList<uint64_t, int8_t, int>(array, iOGRFieldIndex,
4654 : nOffsettedIndex,
4655 : childArray, oFeature);
4656 : }
4657 224 : else if (IsUInt8(childFormat))
4658 : {
4659 18 : if (format[1] == ARROW_2ND_LETTER_LIST)
4660 14 : FillFieldList<uint32_t, uint8_t, int>(array, iOGRFieldIndex,
4661 : nOffsettedIndex,
4662 : childArray, oFeature);
4663 : else
4664 4 : FillFieldList<uint64_t, uint8_t, int>(array, iOGRFieldIndex,
4665 : nOffsettedIndex,
4666 : childArray, oFeature);
4667 : }
4668 206 : else if (IsInt16(childFormat))
4669 : {
4670 16 : if (format[1] == ARROW_2ND_LETTER_LIST)
4671 12 : FillFieldList<uint32_t, int16_t, int>(array, iOGRFieldIndex,
4672 : nOffsettedIndex,
4673 : childArray, oFeature);
4674 : else
4675 4 : FillFieldList<uint64_t, int16_t, int>(array, iOGRFieldIndex,
4676 : nOffsettedIndex,
4677 : childArray, oFeature);
4678 : }
4679 190 : else if (IsUInt16(childFormat))
4680 : {
4681 14 : if (format[1] == ARROW_2ND_LETTER_LIST)
4682 10 : FillFieldList<uint32_t, uint16_t, int>(array, iOGRFieldIndex,
4683 : nOffsettedIndex,
4684 : childArray, oFeature);
4685 : else
4686 4 : FillFieldList<uint64_t, uint16_t, int>(array, iOGRFieldIndex,
4687 : nOffsettedIndex,
4688 : childArray, oFeature);
4689 : }
4690 176 : else if (IsInt32(childFormat))
4691 : {
4692 18 : if (format[1] == ARROW_2ND_LETTER_LIST)
4693 14 : FillFieldList<uint32_t, int32_t, int>(array, iOGRFieldIndex,
4694 : nOffsettedIndex,
4695 : childArray, oFeature);
4696 : else
4697 4 : FillFieldList<uint64_t, int32_t, int>(array, iOGRFieldIndex,
4698 : nOffsettedIndex,
4699 : childArray, oFeature);
4700 : }
4701 158 : else if (IsUInt32(childFormat))
4702 : {
4703 8 : if (format[1] == ARROW_2ND_LETTER_LIST)
4704 4 : FillFieldList<uint32_t, uint32_t, GIntBig>(
4705 : array, iOGRFieldIndex, nOffsettedIndex, childArray,
4706 : oFeature);
4707 : else
4708 4 : FillFieldList<uint64_t, uint32_t, GIntBig>(
4709 : array, iOGRFieldIndex, nOffsettedIndex, childArray,
4710 : oFeature);
4711 : }
4712 150 : else if (IsInt64(childFormat))
4713 : {
4714 31 : if (format[1] == ARROW_2ND_LETTER_LIST)
4715 27 : FillFieldList<uint32_t, int64_t, GIntBig>(array, iOGRFieldIndex,
4716 : nOffsettedIndex,
4717 : childArray, oFeature);
4718 : else
4719 4 : FillFieldList<uint64_t, int64_t, GIntBig>(array, iOGRFieldIndex,
4720 : nOffsettedIndex,
4721 : childArray, oFeature);
4722 : }
4723 119 : else if (IsUInt64(childFormat)) // (lossy conversion)
4724 : {
4725 14 : if (format[1] == ARROW_2ND_LETTER_LIST)
4726 10 : FillFieldList<uint32_t, uint64_t, double>(array, iOGRFieldIndex,
4727 : nOffsettedIndex,
4728 : childArray, oFeature);
4729 : else
4730 4 : FillFieldList<uint64_t, uint64_t, double>(array, iOGRFieldIndex,
4731 : nOffsettedIndex,
4732 : childArray, oFeature);
4733 : }
4734 105 : else if (IsFloat16(childFormat))
4735 : {
4736 8 : if (format[1] == ARROW_2ND_LETTER_LIST)
4737 4 : FillFieldListFromHalfFloat<uint32_t>(array, iOGRFieldIndex,
4738 : nOffsettedIndex,
4739 : childArray, oFeature);
4740 : else
4741 4 : FillFieldListFromHalfFloat<uint64_t>(array, iOGRFieldIndex,
4742 : nOffsettedIndex,
4743 : childArray, oFeature);
4744 : }
4745 97 : else if (IsFloat32(childFormat))
4746 : {
4747 16 : if (format[1] == ARROW_2ND_LETTER_LIST)
4748 12 : FillFieldList<uint32_t, float, double>(array, iOGRFieldIndex,
4749 : nOffsettedIndex,
4750 : childArray, oFeature);
4751 : else
4752 4 : FillFieldList<uint64_t, float, double>(array, iOGRFieldIndex,
4753 : nOffsettedIndex,
4754 : childArray, oFeature);
4755 : }
4756 81 : else if (IsFloat64(childFormat))
4757 : {
4758 18 : if (format[1] == ARROW_2ND_LETTER_LIST)
4759 14 : FillFieldList<uint32_t, double, double>(array, iOGRFieldIndex,
4760 : nOffsettedIndex,
4761 : childArray, oFeature);
4762 : else
4763 4 : FillFieldList<uint64_t, double, double>(array, iOGRFieldIndex,
4764 : nOffsettedIndex,
4765 : childArray, oFeature);
4766 : }
4767 63 : else if (IsString(childFormat))
4768 : {
4769 18 : if (format[1] == ARROW_2ND_LETTER_LIST)
4770 14 : FillFieldListFromString<uint32_t, uint32_t>(
4771 : array, iOGRFieldIndex, nOffsettedIndex, childArray,
4772 : oFeature);
4773 : else
4774 4 : FillFieldListFromString<uint64_t, uint32_t>(
4775 : array, iOGRFieldIndex, nOffsettedIndex, childArray,
4776 : oFeature);
4777 : }
4778 45 : else if (IsLargeString(childFormat))
4779 : {
4780 14 : if (format[1] == ARROW_2ND_LETTER_LIST)
4781 10 : FillFieldListFromString<uint32_t, uint64_t>(
4782 : array, iOGRFieldIndex, nOffsettedIndex, childArray,
4783 : oFeature);
4784 : else
4785 4 : FillFieldListFromString<uint64_t, uint64_t>(
4786 : array, iOGRFieldIndex, nOffsettedIndex, childArray,
4787 : oFeature);
4788 : }
4789 31 : else if (format[1] == ARROW_2ND_LETTER_LIST)
4790 : {
4791 31 : const size_t iFeature =
4792 31 : static_cast<size_t>(nOffsettedIndex - array->offset);
4793 31 : oFeature.SetField(iOGRFieldIndex,
4794 62 : GetListAsJSON<uint32_t>(schema, array, iFeature)
4795 62 : .Format(CPLJSONObject::PrettyFormat::Plain)
4796 : .c_str());
4797 : }
4798 : else
4799 : {
4800 0 : const size_t iFeature =
4801 0 : static_cast<size_t>(nOffsettedIndex - array->offset);
4802 0 : oFeature.SetField(iOGRFieldIndex,
4803 0 : GetListAsJSON<uint64_t>(schema, array, iFeature)
4804 0 : .Format(CPLJSONObject::PrettyFormat::Plain)
4805 : .c_str());
4806 : }
4807 : }
4808 243 : else if (IsDecimal(format))
4809 : {
4810 4 : int nPrecision = 0;
4811 4 : int nScale = 0;
4812 4 : int nWidthInBytes = 0;
4813 4 : if (!ParseDecimalFormat(format, nPrecision, nScale, nWidthInBytes))
4814 : {
4815 0 : CPLAssert(false);
4816 : }
4817 :
4818 : // fits on a int64
4819 4 : CPLAssert(nPrecision <= 19);
4820 : // either 128 or 256 bits
4821 4 : CPLAssert((nWidthInBytes % 8) == 0);
4822 4 : const int nWidthIn64BitWord = nWidthInBytes / 8;
4823 4 : const size_t iFeature =
4824 4 : static_cast<size_t>(nOffsettedIndex - array->offset);
4825 4 : oFeature.SetField(
4826 : iOGRFieldIndex,
4827 : GetValueDecimal(array, nWidthIn64BitWord, nScale, iFeature));
4828 4 : return true;
4829 : }
4830 239 : else if (IsMap(format))
4831 : {
4832 239 : const size_t iFeature =
4833 239 : static_cast<size_t>(nOffsettedIndex - array->offset);
4834 239 : oFeature.SetField(iOGRFieldIndex,
4835 478 : GetMapAsJSON(schema, array, iFeature)
4836 478 : .Format(CPLJSONObject::PrettyFormat::Plain)
4837 : .c_str());
4838 : }
4839 : else
4840 : {
4841 0 : return false;
4842 : }
4843 :
4844 819 : return true;
4845 : }
4846 :
4847 : /************************************************************************/
4848 : /* FillValidityArrayFromAttrQuery() */
4849 : /************************************************************************/
4850 :
4851 134 : static size_t FillValidityArrayFromAttrQuery(
4852 : const OGRLayer *poLayer, OGRFeatureQuery *poAttrQuery,
4853 : const struct ArrowSchema *schema, struct ArrowArray *array,
4854 : std::vector<bool> &abyValidityFromFilters, CSLConstList papszOptions)
4855 : {
4856 134 : size_t nCountIntersecting = 0;
4857 134 : auto poFeatureDefn = const_cast<OGRLayer *>(poLayer)->GetLayerDefn();
4858 268 : OGRFeature oFeature(poFeatureDefn);
4859 :
4860 268 : std::map<std::string, std::vector<int>> oMapFieldNameToArrowPath;
4861 268 : std::vector<int> anArrowPathTmp;
4862 134 : BuildMapFieldNameToArrowPath(schema, oMapFieldNameToArrowPath,
4863 268 : std::string(), anArrowPathTmp);
4864 :
4865 : struct UsedFieldsInfo
4866 : {
4867 : int iOGRFieldIndex{};
4868 : std::vector<int> anArrowPath{};
4869 : };
4870 :
4871 268 : std::vector<UsedFieldsInfo> aoUsedFieldsInfo;
4872 :
4873 134 : bool bNeedsFID = false;
4874 268 : const CPLStringList aosUsedFields(poAttrQuery->GetUsedFields());
4875 252 : for (int i = 0; i < aosUsedFields.size(); ++i)
4876 : {
4877 118 : int iOGRFieldIndex = poFeatureDefn->GetFieldIndex(aosUsedFields[i]);
4878 118 : if (iOGRFieldIndex >= 0)
4879 : {
4880 112 : const auto oIter = oMapFieldNameToArrowPath.find(aosUsedFields[i]);
4881 112 : if (oIter != oMapFieldNameToArrowPath.end())
4882 : {
4883 224 : UsedFieldsInfo info;
4884 112 : info.iOGRFieldIndex = iOGRFieldIndex;
4885 112 : info.anArrowPath = oIter->second;
4886 112 : aoUsedFieldsInfo.push_back(std::move(info));
4887 : }
4888 : else
4889 : {
4890 0 : CPLError(CE_Failure, CPLE_AppDefined,
4891 : "Cannot find %s in oMapFieldNameToArrowPath",
4892 : aosUsedFields[i]);
4893 : }
4894 : }
4895 6 : else if (EQUAL(aosUsedFields[i], "FID"))
4896 : {
4897 6 : bNeedsFID = true;
4898 : }
4899 : else
4900 : {
4901 0 : CPLDebug("OGR", "Cannot find used field %s", aosUsedFields[i]);
4902 : }
4903 : }
4904 :
4905 134 : const size_t nLength = abyValidityFromFilters.size();
4906 :
4907 134 : GIntBig nBaseSeqFID = -1;
4908 268 : std::vector<int> anArrowPathToFIDColumn;
4909 134 : if (bNeedsFID)
4910 : {
4911 : // BASE_SEQUENTIAL_FID is set when there is no Arrow column for the FID
4912 : // and we assume sequential FID numbering
4913 : const char *pszBaseSeqFID =
4914 6 : CSLFetchNameValue(papszOptions, "BASE_SEQUENTIAL_FID");
4915 6 : if (pszBaseSeqFID)
4916 : {
4917 5 : nBaseSeqFID = CPLAtoGIntBig(pszBaseSeqFID);
4918 :
4919 : // Optimizimation for "FID = constant"
4920 : swq_expr_node *poNode =
4921 5 : static_cast<swq_expr_node *>(poAttrQuery->GetSWQExpr());
4922 15 : if (poNode->eNodeType == SNT_OPERATION &&
4923 5 : poNode->nOperation == SWQ_EQ && poNode->nSubExprCount == 2 &&
4924 2 : poNode->papoSubExpr[0]->eNodeType == SNT_COLUMN &&
4925 2 : poNode->papoSubExpr[1]->eNodeType == SNT_CONSTANT &&
4926 2 : poNode->papoSubExpr[0]->field_index ==
4927 12 : poFeatureDefn->GetFieldCount() + SPF_FID &&
4928 2 : poNode->papoSubExpr[1]->field_type == SWQ_INTEGER64)
4929 : {
4930 2 : if (nBaseSeqFID + static_cast<int64_t>(nLength) <
4931 2 : poNode->papoSubExpr[1]->int_value ||
4932 2 : nBaseSeqFID > poNode->papoSubExpr[1]->int_value)
4933 : {
4934 0 : return 0;
4935 : }
4936 : }
4937 : }
4938 : else
4939 : {
4940 : const char *pszFIDColumn =
4941 1 : const_cast<OGRLayer *>(poLayer)->GetFIDColumn();
4942 1 : if (pszFIDColumn && pszFIDColumn[0])
4943 : {
4944 1 : const auto oIter = oMapFieldNameToArrowPath.find(pszFIDColumn);
4945 1 : if (oIter != oMapFieldNameToArrowPath.end())
4946 : {
4947 1 : anArrowPathToFIDColumn = oIter->second;
4948 : }
4949 : }
4950 1 : if (anArrowPathToFIDColumn.empty())
4951 : {
4952 0 : CPLError(CE_Failure, CPLE_AppDefined,
4953 : "Filtering on FID requested but cannot associate a "
4954 : "FID with Arrow records");
4955 : }
4956 : }
4957 : }
4958 :
4959 555 : for (size_t iRow = 0; iRow < nLength; ++iRow)
4960 : {
4961 421 : if (!abyValidityFromFilters[iRow])
4962 2 : continue;
4963 :
4964 419 : if (bNeedsFID)
4965 : {
4966 21 : if (nBaseSeqFID >= 0)
4967 : {
4968 11 : oFeature.SetFID(nBaseSeqFID + iRow);
4969 : }
4970 10 : else if (!anArrowPathToFIDColumn.empty())
4971 : {
4972 10 : oFeature.SetFID(OGRNullFID);
4973 :
4974 10 : const struct ArrowSchema *psSchemaField = schema;
4975 10 : const struct ArrowArray *psArray = array;
4976 10 : bool bSkip = false;
4977 20 : for (size_t i = 0; i < anArrowPathToFIDColumn.size(); ++i)
4978 : {
4979 10 : const int iChild = anArrowPathToFIDColumn[i];
4980 10 : if (i > 0)
4981 : {
4982 0 : const uint8_t *pabyValidity =
4983 0 : psArray->null_count == 0
4984 0 : ? nullptr
4985 : : static_cast<uint8_t *>(
4986 0 : const_cast<void *>(psArray->buffers[0]));
4987 0 : const size_t nOffsettedIndex =
4988 0 : static_cast<size_t>(iRow + psArray->offset);
4989 0 : if (pabyValidity &&
4990 0 : !TestBit(pabyValidity, nOffsettedIndex))
4991 : {
4992 0 : bSkip = true;
4993 0 : break;
4994 : }
4995 : }
4996 :
4997 10 : psSchemaField = psSchemaField->children[iChild];
4998 10 : psArray = psArray->children[iChild];
4999 : }
5000 10 : if (bSkip)
5001 0 : continue;
5002 :
5003 10 : const char *format = psSchemaField->format;
5004 10 : const uint8_t *pabyValidity =
5005 10 : psArray->null_count == 0
5006 10 : ? nullptr
5007 : : static_cast<uint8_t *>(
5008 0 : const_cast<void *>(psArray->buffers[0]));
5009 10 : const size_t nOffsettedIndex =
5010 10 : static_cast<size_t>(iRow + psArray->offset);
5011 10 : if (pabyValidity && !TestBit(pabyValidity, nOffsettedIndex))
5012 : {
5013 : // do nothing
5014 : }
5015 10 : else if (IsInt32(format))
5016 : {
5017 0 : oFeature.SetFID(static_cast<const int32_t *>(
5018 0 : psArray->buffers[1])[nOffsettedIndex]);
5019 : }
5020 10 : else if (IsInt64(format))
5021 : {
5022 10 : oFeature.SetFID(static_cast<const int64_t *>(
5023 10 : psArray->buffers[1])[nOffsettedIndex]);
5024 : }
5025 : }
5026 : }
5027 :
5028 725 : for (const auto &sInfo : aoUsedFieldsInfo)
5029 : {
5030 306 : const int iOGRFieldIndex = sInfo.iOGRFieldIndex;
5031 306 : const struct ArrowSchema *psSchemaField = schema;
5032 306 : const struct ArrowArray *psArray = array;
5033 306 : bool bSkip = false;
5034 612 : for (size_t i = 0; i < sInfo.anArrowPath.size(); ++i)
5035 : {
5036 306 : const int iChild = sInfo.anArrowPath[i];
5037 306 : if (i > 0)
5038 : {
5039 0 : const uint8_t *pabyValidity =
5040 0 : psArray->null_count == 0
5041 0 : ? nullptr
5042 : : static_cast<uint8_t *>(
5043 0 : const_cast<void *>(psArray->buffers[0]));
5044 0 : const size_t nOffsettedIndex =
5045 0 : static_cast<size_t>(iRow + psArray->offset);
5046 0 : if (pabyValidity && !TestBit(pabyValidity, nOffsettedIndex))
5047 : {
5048 0 : bSkip = true;
5049 0 : oFeature.SetFieldNull(iOGRFieldIndex);
5050 0 : break;
5051 : }
5052 : }
5053 :
5054 306 : psSchemaField = psSchemaField->children[iChild];
5055 306 : psArray = psArray->children[iChild];
5056 : }
5057 306 : if (bSkip)
5058 0 : continue;
5059 :
5060 306 : const char *format = psSchemaField->format;
5061 306 : const uint8_t *pabyValidity =
5062 306 : psArray->null_count == 0
5063 306 : ? nullptr
5064 : : static_cast<uint8_t *>(
5065 129 : const_cast<void *>(psArray->buffers[0]));
5066 306 : const size_t nOffsettedIndex =
5067 306 : static_cast<size_t>(iRow + psArray->offset);
5068 306 : if (pabyValidity && !TestBit(pabyValidity, nOffsettedIndex))
5069 : {
5070 38 : oFeature.SetFieldNull(iOGRFieldIndex);
5071 : }
5072 268 : else if (IsBoolean(format))
5073 : {
5074 78 : oFeature.SetField(
5075 : iOGRFieldIndex,
5076 78 : TestBit(static_cast<const uint8_t *>(psArray->buffers[1]),
5077 : nOffsettedIndex));
5078 : }
5079 190 : else if (IsInt8(format))
5080 : {
5081 8 : oFeature.SetField(iOGRFieldIndex,
5082 8 : static_cast<const int8_t *>(
5083 8 : psArray->buffers[1])[nOffsettedIndex]);
5084 : }
5085 182 : else if (IsUInt8(format))
5086 : {
5087 4 : oFeature.SetField(iOGRFieldIndex,
5088 4 : static_cast<const uint8_t *>(
5089 4 : psArray->buffers[1])[nOffsettedIndex]);
5090 : }
5091 178 : else if (IsInt16(format))
5092 : {
5093 16 : oFeature.SetField(iOGRFieldIndex,
5094 16 : static_cast<const int16_t *>(
5095 16 : psArray->buffers[1])[nOffsettedIndex]);
5096 : }
5097 162 : else if (IsUInt16(format))
5098 : {
5099 2 : oFeature.SetField(iOGRFieldIndex,
5100 2 : static_cast<const uint16_t *>(
5101 2 : psArray->buffers[1])[nOffsettedIndex]);
5102 : }
5103 160 : else if (IsInt32(format))
5104 : {
5105 10 : oFeature.SetField(iOGRFieldIndex,
5106 10 : static_cast<const int32_t *>(
5107 10 : psArray->buffers[1])[nOffsettedIndex]);
5108 : }
5109 150 : else if (IsUInt32(format))
5110 : {
5111 0 : oFeature.SetField(
5112 : iOGRFieldIndex,
5113 0 : static_cast<GIntBig>(static_cast<const uint32_t *>(
5114 0 : psArray->buffers[1])[nOffsettedIndex]));
5115 : }
5116 150 : else if (IsInt64(format))
5117 : {
5118 4 : oFeature.SetField(
5119 : iOGRFieldIndex,
5120 4 : static_cast<GIntBig>(static_cast<const int64_t *>(
5121 4 : psArray->buffers[1])[nOffsettedIndex]));
5122 : }
5123 146 : else if (IsUInt64(format))
5124 : {
5125 4 : oFeature.SetField(
5126 : iOGRFieldIndex,
5127 4 : static_cast<double>(static_cast<const uint64_t *>(
5128 4 : psArray->buffers[1])[nOffsettedIndex]));
5129 : }
5130 142 : else if (IsFloat32(format))
5131 : {
5132 2 : oFeature.SetField(
5133 : iOGRFieldIndex,
5134 2 : static_cast<double>(static_cast<const float *>(
5135 2 : psArray->buffers[1])[nOffsettedIndex]));
5136 : }
5137 140 : else if (IsFloat64(format))
5138 : {
5139 26 : oFeature.SetField(iOGRFieldIndex,
5140 26 : static_cast<const double *>(
5141 26 : psArray->buffers[1])[nOffsettedIndex]);
5142 : }
5143 114 : else if (IsString(format))
5144 : {
5145 18 : const auto nOffset = static_cast<const uint32_t *>(
5146 18 : psArray->buffers[1])[nOffsettedIndex];
5147 18 : const auto nNextOffset = static_cast<const uint32_t *>(
5148 18 : psArray->buffers[1])[nOffsettedIndex + 1];
5149 18 : const GByte *pabyData =
5150 18 : static_cast<const GByte *>(psArray->buffers[2]);
5151 18 : const uint32_t nSize = nNextOffset - nOffset;
5152 18 : CPLAssert(oFeature.GetFieldDefnRef(iOGRFieldIndex)->GetType() ==
5153 : OFTString);
5154 18 : char *pszStr = static_cast<char *>(CPLMalloc(nSize + 1));
5155 18 : memcpy(pszStr, pabyData + nOffset, nSize);
5156 18 : pszStr[nSize] = 0;
5157 18 : OGRField *psField = oFeature.GetRawFieldRef(iOGRFieldIndex);
5158 18 : if (IsValidField(psField))
5159 12 : CPLFree(psField->String);
5160 18 : psField->String = pszStr;
5161 : }
5162 96 : else if (IsLargeString(format))
5163 : {
5164 6 : const auto nOffset = static_cast<const uint64_t *>(
5165 6 : psArray->buffers[1])[nOffsettedIndex];
5166 6 : const auto nNextOffset = static_cast<const uint64_t *>(
5167 6 : psArray->buffers[1])[nOffsettedIndex + 1];
5168 6 : const GByte *pabyData =
5169 6 : static_cast<const GByte *>(psArray->buffers[2]);
5170 6 : const size_t nSize = static_cast<size_t>(nNextOffset - nOffset);
5171 6 : char *pszStr = static_cast<char *>(CPLMalloc(nSize + 1));
5172 6 : memcpy(pszStr, pabyData + static_cast<size_t>(nOffset), nSize);
5173 6 : pszStr[nSize] = 0;
5174 6 : OGRField *psField = oFeature.GetRawFieldRef(iOGRFieldIndex);
5175 6 : if (IsValidField(psField))
5176 3 : CPLFree(psField->String);
5177 6 : psField->String = pszStr;
5178 : }
5179 90 : else if (IsBinary(format))
5180 : {
5181 5 : const auto nOffset = static_cast<const uint32_t *>(
5182 5 : psArray->buffers[1])[nOffsettedIndex];
5183 5 : const auto nNextOffset = static_cast<const uint32_t *>(
5184 5 : psArray->buffers[1])[nOffsettedIndex + 1];
5185 5 : const GByte *pabyData =
5186 5 : static_cast<const GByte *>(psArray->buffers[2]);
5187 5 : const uint32_t nSize = nNextOffset - nOffset;
5188 10 : if (nSize >
5189 5 : static_cast<size_t>(std::numeric_limits<int32_t>::max()))
5190 : {
5191 0 : abyValidityFromFilters.clear();
5192 0 : abyValidityFromFilters.resize(nLength);
5193 0 : CPLError(CE_Failure, CPLE_AppDefined,
5194 : "Unexpected error in PostFilterArrowArray(): too "
5195 : "large binary");
5196 0 : return 0;
5197 : }
5198 5 : oFeature.SetField(iOGRFieldIndex, static_cast<int>(nSize),
5199 5 : pabyData + nOffset);
5200 : }
5201 85 : else if (IsLargeBinary(format))
5202 : {
5203 5 : const auto nOffset = static_cast<const uint64_t *>(
5204 5 : psArray->buffers[1])[nOffsettedIndex];
5205 5 : const auto nNextOffset = static_cast<const uint64_t *>(
5206 5 : psArray->buffers[1])[nOffsettedIndex + 1];
5207 5 : const GByte *pabyData =
5208 5 : static_cast<const GByte *>(psArray->buffers[2]);
5209 5 : const uint64_t nSize = nNextOffset - nOffset;
5210 5 : if (nSize >
5211 5 : static_cast<uint64_t>(std::numeric_limits<int32_t>::max()))
5212 : {
5213 0 : abyValidityFromFilters.clear();
5214 0 : abyValidityFromFilters.resize(nLength);
5215 0 : CPLError(CE_Failure, CPLE_AppDefined,
5216 : "Unexpected error in PostFilterArrowArray(): too "
5217 : "large binary");
5218 0 : return 0;
5219 : }
5220 5 : oFeature.SetField(iOGRFieldIndex, static_cast<int>(nSize),
5221 5 : pabyData + nOffset);
5222 : }
5223 80 : else if (!SetFieldForOtherFormats(oFeature, iOGRFieldIndex,
5224 : nOffsettedIndex, psSchemaField,
5225 : psArray))
5226 : {
5227 0 : abyValidityFromFilters.clear();
5228 0 : abyValidityFromFilters.resize(nLength);
5229 0 : CPLError(
5230 : CE_Failure, CPLE_AppDefined,
5231 : "Unexpected error in PostFilterArrowArray(): unhandled "
5232 : "field format: %s",
5233 : format);
5234 0 : return 0;
5235 : }
5236 : }
5237 419 : if (poAttrQuery->Evaluate(&oFeature))
5238 : {
5239 215 : nCountIntersecting++;
5240 : }
5241 : else
5242 : {
5243 204 : abyValidityFromFilters[iRow] = false;
5244 : }
5245 : }
5246 134 : return nCountIntersecting;
5247 : }
5248 :
5249 : /************************************************************************/
5250 : /* OGRLayer::PostFilterArrowArray() */
5251 : /************************************************************************/
5252 :
5253 : /** Remove rows that aren't selected by the spatial or attribute filter.
5254 : *
5255 : * Assumes that CanPostFilterArrowArray() has been called and returned true.
5256 : */
5257 153 : void OGRLayer::PostFilterArrowArray(const struct ArrowSchema *schema,
5258 : struct ArrowArray *array,
5259 : CSLConstList papszOptions) const
5260 : {
5261 153 : if (!m_poFilterGeom && !m_poAttrQuery)
5262 43 : return;
5263 :
5264 153 : CPLAssert(schema->n_children == array->n_children);
5265 :
5266 153 : int64_t iGeomField = -1;
5267 153 : if (m_poFilterGeom)
5268 : {
5269 : const char *pszGeomFieldName =
5270 : const_cast<OGRLayer *>(this)
5271 21 : ->GetLayerDefn()
5272 21 : ->GetGeomFieldDefn(m_iGeomFieldFilter)
5273 21 : ->GetNameRef();
5274 837 : for (int64_t iField = 0; iField < schema->n_children; ++iField)
5275 : {
5276 837 : const auto fieldSchema = schema->children[iField];
5277 837 : if (strcmp(fieldSchema->name, pszGeomFieldName) == 0)
5278 : {
5279 21 : iGeomField = iField;
5280 21 : break;
5281 : }
5282 816 : CPLAssert(array->children[iField]->length ==
5283 : array->children[0]->length);
5284 : }
5285 : // Guaranteed if CanPostFilterArrowArray() returned true
5286 21 : CPLAssert(iGeomField >= 0);
5287 21 : CPLAssert(IsBinary(schema->children[iGeomField]->format) ||
5288 : IsLargeBinary(schema->children[iGeomField]->format));
5289 21 : CPLAssert(array->children[iGeomField]->n_buffers == 3);
5290 : }
5291 :
5292 153 : std::vector<bool> abyValidityFromFilters;
5293 153 : const size_t nLength = static_cast<size_t>(array->length);
5294 : const size_t nCountIntersectingGeom =
5295 174 : m_poFilterGeom ? (IsBinary(schema->children[iGeomField]->format)
5296 42 : ? FillValidityArrayFromWKBArray<uint32_t>(
5297 21 : array->children[iGeomField], this,
5298 : abyValidityFromFilters)
5299 0 : : FillValidityArrayFromWKBArray<uint64_t>(
5300 0 : array->children[iGeomField], this,
5301 : abyValidityFromFilters))
5302 153 : : nLength;
5303 153 : if (!m_poFilterGeom)
5304 132 : abyValidityFromFilters.resize(nLength, true);
5305 : const size_t nCountIntersecting =
5306 134 : m_poAttrQuery && nCountIntersectingGeom > 0
5307 306 : ? FillValidityArrayFromAttrQuery(this, m_poAttrQuery, schema, array,
5308 : abyValidityFromFilters,
5309 : papszOptions)
5310 19 : : m_poFilterGeom ? nCountIntersectingGeom
5311 153 : : nLength;
5312 : // Nothing to do ?
5313 153 : if (nCountIntersecting == nLength)
5314 : {
5315 : // CPLDebug("OGR", "All rows match filter");
5316 43 : return;
5317 : }
5318 :
5319 110 : if (nCountIntersecting == 0)
5320 : {
5321 27 : array->length = 0;
5322 : }
5323 83 : else if (!CompactStructArray(schema, array, 0, abyValidityFromFilters,
5324 : nCountIntersecting))
5325 : {
5326 0 : array->release(array);
5327 0 : memset(array, 0, sizeof(*array));
5328 : }
5329 : }
5330 :
5331 : /************************************************************************/
5332 : /* OGRCloneArrowArray */
5333 : /************************************************************************/
5334 :
5335 13947 : static bool OGRCloneArrowArray(const struct ArrowSchema *schema,
5336 : const struct ArrowArray *src_array,
5337 : struct ArrowArray *out_array,
5338 : size_t nParentOffset)
5339 : {
5340 13947 : memset(out_array, 0, sizeof(*out_array));
5341 13947 : const size_t nLength =
5342 13947 : static_cast<size_t>(src_array->length) - nParentOffset;
5343 13947 : out_array->length = nLength;
5344 13947 : out_array->null_count = src_array->null_count;
5345 13947 : out_array->release = OGRLayerDefaultReleaseArray;
5346 :
5347 13947 : bool bRet = true;
5348 :
5349 13947 : out_array->n_buffers = src_array->n_buffers;
5350 27894 : out_array->buffers = static_cast<const void **>(CPLCalloc(
5351 13947 : static_cast<size_t>(src_array->n_buffers), sizeof(const void *)));
5352 13947 : CPLAssert(static_cast<size_t>(src_array->length) >= nParentOffset);
5353 13947 : const char *format = schema->format;
5354 13947 : const auto nOffset = static_cast<size_t>(src_array->offset) + nParentOffset;
5355 41479 : for (int64_t i = 0; i < src_array->n_buffers; ++i)
5356 : {
5357 27532 : if (i == 0 || IsBoolean(format))
5358 : {
5359 14318 : if (i == 1)
5360 : {
5361 371 : CPLAssert(src_array->buffers[i]);
5362 : }
5363 14318 : if (src_array->buffers[i])
5364 : {
5365 8765 : const size_t nBytes = nLength ? (nLength + 7) / 8 : 1;
5366 : uint8_t *CPL_RESTRICT p = static_cast<uint8_t *>(
5367 8765 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nBytes));
5368 8765 : if (!p)
5369 : {
5370 0 : bRet = false;
5371 0 : break;
5372 : }
5373 8765 : const auto *CPL_RESTRICT pSrcArray =
5374 8765 : static_cast<const uint8_t *>(src_array->buffers[i]);
5375 8765 : if ((nOffset % 8) != 0)
5376 : {
5377 : // Make sure last byte is fully initialized
5378 2281 : p[nBytes - 1] = 0;
5379 7359 : for (size_t iRow = 0; iRow < nLength; ++iRow)
5380 : {
5381 5078 : if (TestBit(pSrcArray, nOffset + iRow))
5382 4949 : SetBit(p, iRow);
5383 : else
5384 129 : UnsetBit(p, iRow);
5385 : }
5386 : }
5387 : else
5388 : {
5389 6484 : memcpy(p, pSrcArray + nOffset / 8, nBytes);
5390 : }
5391 8765 : out_array->buffers[i] = p;
5392 : }
5393 : }
5394 13214 : else if (i == 1)
5395 : {
5396 11083 : CPLAssert(src_array->buffers[i]);
5397 11083 : size_t nEltSize = 0;
5398 11083 : size_t nExtraElt = 0;
5399 11083 : if (IsUInt8(format) || IsInt8(format))
5400 742 : nEltSize = sizeof(uint8_t);
5401 10341 : else if (IsUInt16(format) || IsInt16(format) || IsFloat16(format))
5402 762 : nEltSize = sizeof(uint16_t);
5403 19138 : else if (IsUInt32(format) || IsInt32(format) || IsFloat32(format) ||
5404 27618 : strcmp(format, "tdD") == 0 || strcmp(format, "tts") == 0 ||
5405 8480 : strcmp(format, "ttm") == 0)
5406 : {
5407 1316 : nEltSize = sizeof(uint32_t);
5408 : }
5409 12719 : else if (IsString(format) || IsBinary(format) || IsList(format) ||
5410 4456 : IsMap(format))
5411 : {
5412 4496 : nEltSize = sizeof(uint32_t);
5413 4496 : nExtraElt = 1;
5414 : }
5415 7163 : else if (IsUInt64(format) || IsInt64(format) || IsFloat64(format) ||
5416 1648 : strcmp(format, "tdm") == 0 || strcmp(format, "ttu") == 0 ||
5417 7163 : strcmp(format, "ttn") == 0 || IsTimestamp(format))
5418 : {
5419 2939 : nEltSize = sizeof(uint64_t);
5420 : }
5421 1318 : else if (IsLargeString(format) || IsLargeBinary(format) ||
5422 490 : IsLargeList(format))
5423 : {
5424 343 : nEltSize = sizeof(uint64_t);
5425 343 : nExtraElt = 1;
5426 : }
5427 485 : else if (IsFixedWidthBinary(format))
5428 : {
5429 111 : nEltSize = GetFixedWithBinary(format);
5430 : }
5431 374 : else if (IsDecimal(format))
5432 : {
5433 374 : int nPrecision = 0;
5434 374 : int nScale = 0;
5435 374 : int nWidthInBytes = 0;
5436 374 : if (!ParseDecimalFormat(format, nPrecision, nScale,
5437 : nWidthInBytes))
5438 : {
5439 0 : CPLError(
5440 : CE_Failure, CPLE_AppDefined,
5441 : "Unexpected error in OGRCloneArrowArray(): unhandled "
5442 : "field format: %s",
5443 : format);
5444 :
5445 0 : return false;
5446 : }
5447 374 : nEltSize = nWidthInBytes;
5448 : }
5449 11083 : if (nEltSize)
5450 : {
5451 11083 : void *p = VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
5452 : nLength ? nEltSize * (nLength + nExtraElt) : 1);
5453 11083 : if (!p)
5454 : {
5455 0 : bRet = false;
5456 0 : break;
5457 : }
5458 11083 : if (nLength)
5459 : {
5460 12876 : if ((IsString(format) || IsBinary(format)) &&
5461 1793 : static_cast<const uint32_t *>(
5462 1793 : src_array->buffers[1])[nOffset] != 0)
5463 : {
5464 258 : const auto *CPL_RESTRICT pSrcOffsets =
5465 258 : static_cast<const uint32_t *>(
5466 258 : src_array->buffers[1]) +
5467 : nOffset;
5468 258 : const auto nShiftOffset = pSrcOffsets[0];
5469 258 : auto *CPL_RESTRICT pDstOffsets =
5470 : static_cast<uint32_t *>(p);
5471 1118 : for (size_t iRow = 0; iRow <= nLength; ++iRow)
5472 : {
5473 860 : pDstOffsets[iRow] =
5474 860 : pSrcOffsets[iRow] - nShiftOffset;
5475 : }
5476 : }
5477 11163 : else if ((IsLargeString(format) || IsLargeBinary(format)) &&
5478 338 : static_cast<const uint64_t *>(
5479 338 : src_array->buffers[1])[nOffset] != 0)
5480 : {
5481 86 : const auto *CPL_RESTRICT pSrcOffsets =
5482 86 : static_cast<const uint64_t *>(
5483 86 : src_array->buffers[1]) +
5484 : nOffset;
5485 86 : const auto nShiftOffset = pSrcOffsets[0];
5486 86 : auto *CPL_RESTRICT pDstOffsets =
5487 : static_cast<uint64_t *>(p);
5488 344 : for (size_t iRow = 0; iRow <= nLength; ++iRow)
5489 : {
5490 258 : pDstOffsets[iRow] =
5491 258 : pSrcOffsets[iRow] - nShiftOffset;
5492 : }
5493 : }
5494 : else
5495 : {
5496 10739 : memcpy(
5497 : p,
5498 10739 : static_cast<const GByte *>(src_array->buffers[i]) +
5499 10739 : nEltSize * nOffset,
5500 10739 : nEltSize * (nLength + nExtraElt));
5501 : }
5502 : }
5503 11083 : out_array->buffers[i] = p;
5504 : }
5505 : else
5506 : {
5507 0 : CPLError(CE_Failure, CPLE_AppDefined,
5508 : "OGRCloneArrowArray(): unhandled case, array = %s, "
5509 : "format = '%s', i = 1",
5510 0 : schema->name, format);
5511 0 : bRet = false;
5512 0 : break;
5513 : }
5514 : }
5515 2131 : else if (i == 2)
5516 : {
5517 2131 : CPLAssert(src_array->buffers[i]);
5518 2131 : size_t nSrcCharOffset = 0;
5519 2131 : size_t nCharCount = 0;
5520 2131 : if (IsString(format) || IsBinary(format))
5521 : {
5522 1793 : const auto *pSrcOffsets =
5523 1793 : static_cast<const uint32_t *>(src_array->buffers[1]) +
5524 : nOffset;
5525 1793 : nSrcCharOffset = pSrcOffsets[0];
5526 1793 : nCharCount = pSrcOffsets[nLength] - pSrcOffsets[0];
5527 : }
5528 338 : else if (IsLargeString(format) || IsLargeBinary(format))
5529 : {
5530 338 : const auto *pSrcOffsets =
5531 338 : static_cast<const uint64_t *>(src_array->buffers[1]) +
5532 : nOffset;
5533 338 : nSrcCharOffset = static_cast<size_t>(pSrcOffsets[0]);
5534 338 : nCharCount =
5535 338 : static_cast<size_t>(pSrcOffsets[nLength] - pSrcOffsets[0]);
5536 : }
5537 : else
5538 : {
5539 0 : CPLError(CE_Failure, CPLE_AppDefined,
5540 : "OGRCloneArrowArray(): unhandled case, array = %s, "
5541 : "format = '%s', i = 2",
5542 0 : schema->name, format);
5543 0 : bRet = false;
5544 0 : break;
5545 : }
5546 : void *p =
5547 2131 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nCharCount ? nCharCount : 1);
5548 2131 : if (!p)
5549 : {
5550 0 : bRet = false;
5551 0 : break;
5552 : }
5553 2131 : if (nCharCount)
5554 : {
5555 2131 : memcpy(p,
5556 2131 : static_cast<const GByte *>(src_array->buffers[i]) +
5557 : nSrcCharOffset,
5558 : nCharCount);
5559 : }
5560 2131 : out_array->buffers[i] = p;
5561 : }
5562 : else
5563 : {
5564 0 : CPLError(CE_Failure, CPLE_AppDefined,
5565 : "OGRCloneArrowArray(): unhandled case, array = %s, format "
5566 : "= '%s', i = 3",
5567 0 : schema->name, format);
5568 0 : bRet = false;
5569 0 : break;
5570 : }
5571 : }
5572 :
5573 13947 : if (bRet)
5574 : {
5575 13947 : out_array->n_children = src_array->n_children;
5576 13947 : out_array->children = static_cast<struct ArrowArray **>(
5577 13947 : CPLCalloc(static_cast<size_t>(src_array->n_children),
5578 : sizeof(struct ArrowArray *)));
5579 27653 : for (int64_t i = 0; i < src_array->n_children; ++i)
5580 : {
5581 27412 : out_array->children[i] = static_cast<struct ArrowArray *>(
5582 13706 : CPLCalloc(1, sizeof(struct ArrowArray)));
5583 39777 : if (!OGRCloneArrowArray(schema->children[i], src_array->children[i],
5584 13706 : out_array->children[i],
5585 13706 : IsFixedSizeList(format)
5586 1341 : ? nOffset * GetFixedSizeList(format)
5587 12365 : : IsStructure(format) ? nOffset
5588 : : 0))
5589 : {
5590 0 : bRet = false;
5591 0 : break;
5592 : }
5593 : }
5594 : }
5595 :
5596 13947 : if (bRet && src_array->dictionary)
5597 : {
5598 111 : out_array->dictionary = static_cast<struct ArrowArray *>(
5599 111 : CPLCalloc(1, sizeof(struct ArrowArray)));
5600 111 : bRet = OGRCloneArrowArray(schema->dictionary, src_array->dictionary,
5601 : out_array->dictionary, 0);
5602 : }
5603 :
5604 13947 : if (!bRet)
5605 : {
5606 0 : out_array->release(out_array);
5607 0 : memset(out_array, 0, sizeof(*out_array));
5608 : }
5609 13947 : return bRet;
5610 : }
5611 :
5612 : /** Full/deep copy of an array.
5613 : *
5614 : * Renormalize the offset of the array (and its children) to 0.
5615 : *
5616 : * In case of failure, out_array will be let in a released state.
5617 : *
5618 : * @param schema Schema of the array. Must *NOT* be NULL.
5619 : * @param src_array Source array. Must *NOT* be NULL.
5620 : * @param out_array Output array. Must *NOT* be NULL (but its content may be random)
5621 : * @return true if success.
5622 : */
5623 130 : bool OGRCloneArrowArray(const struct ArrowSchema *schema,
5624 : const struct ArrowArray *src_array,
5625 : struct ArrowArray *out_array)
5626 : {
5627 130 : return OGRCloneArrowArray(schema, src_array, out_array, 0);
5628 : }
5629 :
5630 : /************************************************************************/
5631 : /* OGRCloneArrowMetadata() */
5632 : /************************************************************************/
5633 :
5634 23 : static void *OGRCloneArrowMetadata(const void *pMetadata)
5635 : {
5636 23 : if (!pMetadata)
5637 19 : return nullptr;
5638 4 : std::vector<GByte> abyOut;
5639 4 : const GByte *pabyMetadata = static_cast<const GByte *>(pMetadata);
5640 : int32_t nKVP;
5641 4 : abyOut.insert(abyOut.end(), pabyMetadata, pabyMetadata + sizeof(int32_t));
5642 4 : memcpy(&nKVP, pabyMetadata, sizeof(int32_t));
5643 4 : pabyMetadata += sizeof(int32_t);
5644 8 : for (int i = 0; i < nKVP; ++i)
5645 : {
5646 : int32_t nSizeKey;
5647 0 : abyOut.insert(abyOut.end(), pabyMetadata,
5648 4 : pabyMetadata + sizeof(int32_t));
5649 4 : memcpy(&nSizeKey, pabyMetadata, sizeof(int32_t));
5650 4 : pabyMetadata += sizeof(int32_t);
5651 4 : abyOut.insert(abyOut.end(), pabyMetadata, pabyMetadata + nSizeKey);
5652 4 : pabyMetadata += nSizeKey;
5653 :
5654 : int32_t nSizeValue;
5655 0 : abyOut.insert(abyOut.end(), pabyMetadata,
5656 4 : pabyMetadata + sizeof(int32_t));
5657 4 : memcpy(&nSizeValue, pabyMetadata, sizeof(int32_t));
5658 4 : pabyMetadata += sizeof(int32_t);
5659 4 : abyOut.insert(abyOut.end(), pabyMetadata, pabyMetadata + nSizeValue);
5660 4 : pabyMetadata += nSizeValue;
5661 : }
5662 :
5663 4 : GByte *pabyOut = static_cast<GByte *>(VSI_MALLOC_VERBOSE(abyOut.size()));
5664 4 : if (pabyOut)
5665 4 : memcpy(pabyOut, abyOut.data(), abyOut.size());
5666 4 : return pabyOut;
5667 : }
5668 :
5669 : /************************************************************************/
5670 : /* OGRCloneArrowSchema() */
5671 : /************************************************************************/
5672 :
5673 : /** Full/deep copy of a schema.
5674 : *
5675 : * In case of failure, out_schema will be let in a released state.
5676 : *
5677 : * @param schema Schema to clone. Must *NOT* be NULL.
5678 : * @param out_schema Output schema. Must *NOT* be NULL (but its content may be random)
5679 : * @return true if success.
5680 : */
5681 23 : bool OGRCloneArrowSchema(const struct ArrowSchema *schema,
5682 : struct ArrowSchema *out_schema)
5683 : {
5684 23 : memset(out_schema, 0, sizeof(*out_schema));
5685 23 : out_schema->release = OGRLayerFullReleaseSchema;
5686 23 : out_schema->format = CPLStrdup(schema->format);
5687 23 : out_schema->name = CPLStrdup(schema->name);
5688 23 : out_schema->metadata = static_cast<const char *>(
5689 23 : const_cast<const void *>(OGRCloneArrowMetadata(schema->metadata)));
5690 23 : out_schema->flags = schema->flags;
5691 23 : if (schema->n_children)
5692 : {
5693 5 : out_schema->children =
5694 5 : static_cast<struct ArrowSchema **>(VSI_CALLOC_VERBOSE(
5695 : static_cast<int>(schema->n_children), sizeof(ArrowSchema *)));
5696 5 : if (!out_schema->children)
5697 : {
5698 0 : out_schema->release(out_schema);
5699 0 : return false;
5700 : }
5701 5 : out_schema->n_children = schema->n_children;
5702 23 : for (int i = 0; i < static_cast<int>(schema->n_children); ++i)
5703 : {
5704 36 : out_schema->children[i] = static_cast<struct ArrowSchema *>(
5705 18 : CPLMalloc(sizeof(ArrowSchema)));
5706 18 : if (!OGRCloneArrowSchema(schema->children[i],
5707 18 : out_schema->children[i]))
5708 : {
5709 0 : out_schema->release(out_schema);
5710 0 : return false;
5711 : }
5712 : }
5713 : }
5714 23 : if (schema->dictionary)
5715 : {
5716 0 : out_schema->dictionary =
5717 0 : static_cast<struct ArrowSchema *>(CPLMalloc(sizeof(ArrowSchema)));
5718 0 : if (!OGRCloneArrowSchema(schema->dictionary, out_schema->dictionary))
5719 : {
5720 0 : out_schema->release(out_schema);
5721 0 : return false;
5722 : }
5723 : }
5724 23 : return true;
5725 : }
5726 :
5727 : /************************************************************************/
5728 : /* OGRLayer::IsArrowSchemaSupported() */
5729 : /************************************************************************/
5730 :
5731 : const struct
5732 : {
5733 : const char *arrowType;
5734 : OGRFieldType eType;
5735 : OGRFieldSubType eSubType;
5736 : } gasArrowTypesToOGR[] = {
5737 : {"b", OFTInteger, OFSTBoolean}, {"c", OFTInteger, OFSTInt16}, // Int8
5738 : {"C", OFTInteger, OFSTInt16}, // UInt8
5739 : {"s", OFTInteger, OFSTInt16}, // Int16
5740 : {"S", OFTInteger, OFSTNone}, // UInt16
5741 : {"i", OFTInteger, OFSTNone}, // Int32
5742 : {"I", OFTInteger64, OFSTNone}, // UInt32
5743 : {"l", OFTInteger64, OFSTNone}, // Int64
5744 : {"L", OFTReal, OFSTNone}, // UInt64 (potentially lossy conversion if going through OGRFeature)
5745 : {"e", OFTReal, OFSTFloat32}, // float16
5746 : {"f", OFTReal, OFSTFloat32}, // float32
5747 : {"g", OFTReal, OFSTNone}, // float64
5748 : {"z", OFTBinary, OFSTNone}, // binary
5749 : {"Z", OFTBinary, OFSTNone}, // large binary (will be limited to 32 bit length though if going through OGRFeature!)
5750 : {"u", OFTString, OFSTNone}, // string
5751 : {"U", OFTString, OFSTNone}, // large string
5752 : {"tdD", OFTDate, OFSTNone}, // date32[days]
5753 : {"tdm", OFTDate, OFSTNone}, // date64[milliseconds]
5754 : {"tts", OFTTime, OFSTNone}, // time32 [seconds]
5755 : {"ttm", OFTTime, OFSTNone}, // time32 [milliseconds]
5756 : {"ttu", OFTTime, OFSTNone}, // time64 [microseconds]
5757 : {"ttn", OFTTime, OFSTNone}, // time64 [nanoseconds]
5758 : };
5759 :
5760 : const struct
5761 : {
5762 : const char arrowLetter;
5763 : OGRFieldType eType;
5764 : OGRFieldSubType eSubType;
5765 : } gasListTypes[] = {
5766 : {ARROW_LETTER_BOOLEAN, OFTIntegerList, OFSTBoolean},
5767 : {ARROW_LETTER_INT8, OFTIntegerList, OFSTInt16},
5768 : {ARROW_LETTER_UINT8, OFTIntegerList, OFSTInt16},
5769 : {ARROW_LETTER_INT16, OFTIntegerList, OFSTInt16},
5770 : {ARROW_LETTER_UINT16, OFTIntegerList, OFSTNone},
5771 : {ARROW_LETTER_INT32, OFTIntegerList, OFSTNone},
5772 : {ARROW_LETTER_UINT32, OFTInteger64List, OFSTNone},
5773 : {ARROW_LETTER_INT64, OFTInteger64List, OFSTNone},
5774 : {ARROW_LETTER_UINT64, OFTRealList,
5775 : OFSTNone}, //(potentially lossy conversion if going through OGRFeature)
5776 : {ARROW_LETTER_FLOAT16, OFTRealList, OFSTFloat32},
5777 : {ARROW_LETTER_FLOAT32, OFTRealList, OFSTFloat32},
5778 : {ARROW_LETTER_FLOAT64, OFTRealList, OFSTNone},
5779 : {ARROW_LETTER_STRING, OFTStringList, OFSTNone},
5780 : {ARROW_LETTER_LARGE_STRING, OFTStringList, OFSTNone},
5781 : };
5782 :
5783 43 : static inline bool IsValidDictionaryIndexType(const char *format)
5784 : {
5785 40 : return (format[0] == ARROW_LETTER_INT8 || format[0] == ARROW_LETTER_UINT8 ||
5786 37 : format[0] == ARROW_LETTER_INT16 ||
5787 34 : format[0] == ARROW_LETTER_UINT16 ||
5788 31 : format[0] == ARROW_LETTER_INT32 ||
5789 9 : format[0] == ARROW_LETTER_UINT32 ||
5790 6 : format[0] == ARROW_LETTER_INT64 ||
5791 89 : format[0] == ARROW_LETTER_UINT64) &&
5792 86 : format[1] == 0;
5793 : }
5794 :
5795 230 : static bool IsSupportForJSONObj(const struct ArrowSchema *schema)
5796 : {
5797 230 : const char *format = schema->format;
5798 230 : if (IsStructure(format))
5799 : {
5800 35 : for (int64_t i = 0; i < schema->n_children; ++i)
5801 : {
5802 26 : if (!IsSupportForJSONObj(schema->children[i]))
5803 0 : return false;
5804 : }
5805 9 : return true;
5806 : }
5807 :
5808 2752 : for (const auto &sType : gasListTypes)
5809 : {
5810 2626 : if (format[0] == sType.arrowLetter && format[1] == 0)
5811 : {
5812 95 : return true;
5813 : }
5814 : }
5815 :
5816 126 : if (IsBinary(format) || IsLargeBinary(format) || IsFixedWidthBinary(format))
5817 12 : return true;
5818 :
5819 114 : if (IsDecimal(format))
5820 : {
5821 6 : int nPrecision = 0;
5822 6 : int nScale = 0;
5823 6 : int nWidthInBytes = 0;
5824 6 : if (!ParseDecimalFormat(format, nPrecision, nScale, nWidthInBytes))
5825 : {
5826 0 : CPLError(CE_Failure, CPLE_AppDefined, "Invalid field format %s",
5827 : format);
5828 0 : return false;
5829 : }
5830 :
5831 6 : return GetErrorIfUnsupportedDecimal(nWidthInBytes, nPrecision) ==
5832 6 : nullptr;
5833 : }
5834 :
5835 108 : if (IsMap(format))
5836 : {
5837 74 : return IsStructure(schema->children[0]->format) &&
5838 148 : schema->children[0]->n_children == 2 &&
5839 222 : IsString(schema->children[0]->children[0]->format) &&
5840 148 : IsSupportForJSONObj(schema->children[0]->children[1]);
5841 : }
5842 :
5843 34 : if (IsList(format) || IsLargeList(format) || IsFixedSizeList(format))
5844 : {
5845 34 : return IsSupportForJSONObj(schema->children[0]);
5846 : }
5847 :
5848 0 : return false;
5849 : }
5850 :
5851 536 : static bool IsArrowSchemaSupportedInternal(const struct ArrowSchema *schema,
5852 : const std::string &osFieldPrefix,
5853 : std::string &osErrorMsg)
5854 : {
5855 0 : const auto AppendError = [&osErrorMsg](const std::string &osMsg)
5856 : {
5857 0 : if (!osErrorMsg.empty())
5858 0 : osErrorMsg += " ";
5859 0 : osErrorMsg += osMsg;
5860 536 : };
5861 :
5862 536 : const char *fieldName = schema->name;
5863 536 : const char *format = schema->format;
5864 536 : if (IsStructure(format))
5865 : {
5866 5 : bool bRet = true;
5867 5 : const std::string osNewPrefix(osFieldPrefix + fieldName + ".");
5868 21 : for (int64_t i = 0; i < schema->n_children; ++i)
5869 : {
5870 16 : if (!IsArrowSchemaSupportedInternal(schema->children[i],
5871 : osNewPrefix, osErrorMsg))
5872 0 : bRet = false;
5873 : }
5874 5 : return bRet;
5875 : }
5876 :
5877 531 : if (schema->dictionary)
5878 : {
5879 15 : if (!IsValidDictionaryIndexType(format))
5880 : {
5881 0 : AppendError("Dictionary only supported if the parent is of "
5882 : "type [U]Int[8|16|32|64]");
5883 0 : return false;
5884 : }
5885 :
5886 15 : schema = schema->dictionary;
5887 15 : format = schema->format;
5888 : }
5889 :
5890 531 : if (IsList(format) || IsLargeList(format) || IsFixedSizeList(format))
5891 : {
5892 : // Only some subtypes supported
5893 132 : const char *childFormat = schema->children[0]->format;
5894 1103 : for (const auto &sType : gasListTypes)
5895 : {
5896 1088 : if (childFormat[0] == sType.arrowLetter && childFormat[1] == 0)
5897 : {
5898 117 : return true;
5899 : }
5900 : }
5901 :
5902 15 : if (IsDecimal(childFormat))
5903 : {
5904 7 : int nPrecision = 0;
5905 7 : int nScale = 0;
5906 7 : int nWidthInBytes = 0;
5907 7 : if (!ParseDecimalFormat(childFormat, nPrecision, nScale,
5908 : nWidthInBytes))
5909 : {
5910 0 : AppendError(std::string("Invalid field format ") + childFormat +
5911 0 : " for field " + osFieldPrefix + fieldName);
5912 0 : return false;
5913 : }
5914 :
5915 : const char *pszError =
5916 7 : GetErrorIfUnsupportedDecimal(nWidthInBytes, nPrecision);
5917 7 : if (pszError)
5918 : {
5919 0 : AppendError(pszError);
5920 0 : return false;
5921 : }
5922 :
5923 7 : return true;
5924 : }
5925 :
5926 8 : if (IsSupportForJSONObj(schema))
5927 : {
5928 8 : return true;
5929 : }
5930 :
5931 0 : AppendError("Type list for field " + osFieldPrefix + fieldName +
5932 : " is not supported.");
5933 0 : return false;
5934 : }
5935 :
5936 399 : else if (IsMap(format))
5937 : {
5938 70 : if (IsSupportForJSONObj(schema))
5939 70 : return true;
5940 :
5941 0 : AppendError("Type map for field " + osFieldPrefix + fieldName +
5942 : " is not supported.");
5943 0 : return false;
5944 : }
5945 329 : else if (IsDecimal(format))
5946 : {
5947 6 : int nPrecision = 0;
5948 6 : int nScale = 0;
5949 6 : int nWidthInBytes = 0;
5950 6 : if (!ParseDecimalFormat(format, nPrecision, nScale, nWidthInBytes))
5951 : {
5952 0 : AppendError(std::string("Invalid field format ") + format +
5953 0 : " for field " + osFieldPrefix + fieldName);
5954 0 : return false;
5955 : }
5956 :
5957 : const char *pszError =
5958 6 : GetErrorIfUnsupportedDecimal(nWidthInBytes, nPrecision);
5959 6 : if (pszError)
5960 : {
5961 0 : AppendError(pszError);
5962 0 : return false;
5963 : }
5964 :
5965 6 : return true;
5966 : }
5967 : else
5968 : {
5969 4137 : for (const auto &sType : gasArrowTypesToOGR)
5970 : {
5971 4117 : if (strcmp(format, sType.arrowType) == 0)
5972 : {
5973 303 : return true;
5974 : }
5975 : }
5976 :
5977 20 : if (IsFixedWidthBinary(format) || IsTimestamp(format))
5978 20 : return true;
5979 :
5980 0 : AppendError("Type '" + std::string(format) + "' for field " +
5981 0 : osFieldPrefix + fieldName + " is not supported.");
5982 0 : return false;
5983 : }
5984 : }
5985 :
5986 : /** Returns whether the provided ArrowSchema is supported for writing.
5987 : *
5988 : * This method exists since not all drivers may support all Arrow data types.
5989 : *
5990 : * The ArrowSchema must be of type struct (format=+s)
5991 : *
5992 : * It is recommended to call this method before calling WriteArrowBatch().
5993 : *
5994 : * This is the same as the C function OGR_L_IsArrowSchemaSupported().
5995 : *
5996 : * @param schema Schema of type struct (format = '+s')
5997 : * @param papszOptions Options (none currently). Null terminated list, or nullptr.
5998 : * @param[out] osErrorMsg Reason of the failure, when this method returns false.
5999 : * @return true if the ArrowSchema is supported for writing.
6000 : * @since 3.8
6001 : */
6002 55 : bool OGRLayer::IsArrowSchemaSupported(const struct ArrowSchema *schema,
6003 : CPL_UNUSED CSLConstList papszOptions,
6004 : std::string &osErrorMsg) const
6005 : {
6006 55 : if (!IsStructure(schema->format))
6007 : {
6008 : osErrorMsg =
6009 : "IsArrowSchemaSupported() should be called on a schema that is a "
6010 1 : "struct of fields";
6011 1 : return false;
6012 : }
6013 :
6014 54 : bool bRet = true;
6015 574 : for (int64_t i = 0; i < schema->n_children; ++i)
6016 : {
6017 520 : if (!IsArrowSchemaSupportedInternal(schema->children[i], std::string(),
6018 : osErrorMsg))
6019 0 : bRet = false;
6020 : }
6021 54 : return bRet;
6022 : }
6023 :
6024 : /************************************************************************/
6025 : /* OGR_L_IsArrowSchemaSupported() */
6026 : /************************************************************************/
6027 :
6028 : /** Returns whether the provided ArrowSchema is supported for writing.
6029 : *
6030 : * This function exists since not all drivers may support all Arrow data types.
6031 : *
6032 : * The ArrowSchema must be of type struct (format=+s)
6033 : *
6034 : * It is recommended to call this function before calling OGR_L_WriteArrowBatch().
6035 : *
6036 : * This is the same as the C++ method OGRLayer::IsArrowSchemaSupported().
6037 : *
6038 : * @param hLayer Layer.
6039 : * @param schema Schema of type struct (format = '+s')
6040 : * @param papszOptions Options (none currently). Null terminated list, or nullptr.
6041 : * @param[out] ppszErrorMsg nullptr, or pointer to a string that will contain
6042 : * the reason of the failure, when this function returns false.
6043 : * @return true if the ArrowSchema is supported for writing.
6044 : * @since 3.8
6045 : */
6046 19 : bool OGR_L_IsArrowSchemaSupported(OGRLayerH hLayer,
6047 : const struct ArrowSchema *schema,
6048 : char **papszOptions, char **ppszErrorMsg)
6049 : {
6050 19 : VALIDATE_POINTER1(hLayer, __func__, false);
6051 19 : VALIDATE_POINTER1(schema, __func__, false);
6052 :
6053 38 : std::string osErrorMsg;
6054 38 : if (!OGRLayer::FromHandle(hLayer)->IsArrowSchemaSupported(
6055 19 : schema, papszOptions, osErrorMsg))
6056 : {
6057 4 : if (ppszErrorMsg)
6058 4 : *ppszErrorMsg = VSIStrdup(osErrorMsg.c_str());
6059 4 : return false;
6060 : }
6061 : else
6062 : {
6063 15 : if (ppszErrorMsg)
6064 15 : *ppszErrorMsg = nullptr;
6065 15 : return true;
6066 : }
6067 : }
6068 :
6069 : /************************************************************************/
6070 : /* IsKnownCodedFieldDomain() */
6071 : /************************************************************************/
6072 :
6073 34 : static bool IsKnownCodedFieldDomain(OGRLayer *poLayer,
6074 : const char *arrowMetadata)
6075 : {
6076 34 : if (arrowMetadata)
6077 : {
6078 6 : const auto oMetadata = OGRParseArrowMetadata(arrowMetadata);
6079 6 : for (const auto &oIter : oMetadata)
6080 : {
6081 6 : if (oIter.first == MD_GDAL_OGR_DOMAIN_NAME)
6082 : {
6083 6 : auto poDS = poLayer->GetDataset();
6084 6 : if (poDS)
6085 : {
6086 : const auto poFieldDomain =
6087 6 : poDS->GetFieldDomain(oIter.second);
6088 12 : if (poFieldDomain &&
6089 6 : poFieldDomain->GetDomainType() == OFDT_CODED)
6090 : {
6091 6 : return true;
6092 : }
6093 : }
6094 : }
6095 : }
6096 : }
6097 28 : return false;
6098 : }
6099 :
6100 : /************************************************************************/
6101 : /* OGRLayer::CreateFieldFromArrowSchema() */
6102 : /************************************************************************/
6103 :
6104 : //! @cond Doxygen_Suppress
6105 469 : bool OGRLayer::CreateFieldFromArrowSchemaInternal(
6106 : const struct ArrowSchema *schema, const std::string &osFieldPrefix,
6107 : CSLConstList papszOptions)
6108 : {
6109 469 : const char *fieldName = schema->name;
6110 469 : const char *format = schema->format;
6111 469 : if (IsStructure(format))
6112 : {
6113 10 : const std::string osNewPrefix(osFieldPrefix + fieldName + ".");
6114 21 : for (int64_t i = 0; i < schema->n_children; ++i)
6115 : {
6116 16 : if (!CreateFieldFromArrowSchemaInternal(schema->children[i],
6117 : osNewPrefix, papszOptions))
6118 0 : return false;
6119 : }
6120 5 : return true;
6121 : }
6122 :
6123 928 : CPLStringList aosNativeTypes;
6124 464 : auto poLayer = const_cast<OGRLayer *>(this);
6125 464 : auto poDS = poLayer->GetDataset();
6126 464 : if (poDS)
6127 : {
6128 464 : auto poDriver = poDS->GetDriver();
6129 464 : if (poDriver)
6130 : {
6131 : const char *pszMetadataItem =
6132 464 : poDriver->GetMetadataItem(GDAL_DMD_CREATIONFIELDDATATYPES);
6133 464 : if (pszMetadataItem)
6134 464 : aosNativeTypes = CSLTokenizeString2(pszMetadataItem, " ", 0);
6135 : }
6136 : }
6137 :
6138 479 : if (schema->dictionary &&
6139 15 : !IsKnownCodedFieldDomain(poLayer, schema->metadata))
6140 : {
6141 13 : if (!IsValidDictionaryIndexType(format))
6142 : {
6143 0 : CPLError(CE_Failure, CPLE_NotSupported,
6144 : "Dictionary only supported if the parent is of "
6145 : "type [U]Int[8|16|32|64]");
6146 0 : return false;
6147 : }
6148 :
6149 13 : schema = schema->dictionary;
6150 13 : format = schema->format;
6151 : }
6152 :
6153 464 : const auto AddField = [this, schema, fieldName, &aosNativeTypes,
6154 : &osFieldPrefix, poDS](OGRFieldType eTypeIn,
6155 : OGRFieldSubType eSubTypeIn,
6156 3282 : int nWidth, int nPrecision)
6157 : {
6158 464 : const char *pszTypeName = OGRFieldDefn::GetFieldTypeName(eTypeIn);
6159 464 : auto eTypeOut = eTypeIn;
6160 464 : auto eSubTypeOut = eSubTypeIn;
6161 928 : if (!aosNativeTypes.empty() &&
6162 464 : aosNativeTypes.FindString(pszTypeName) < 0)
6163 : {
6164 20 : eTypeOut = OFTString;
6165 20 : eSubTypeOut =
6166 15 : (eTypeIn == OFTIntegerList || eTypeIn == OFTInteger64List ||
6167 8 : eTypeIn == OFTRealList || eTypeIn == OFTStringList)
6168 35 : ? OFSTJSON
6169 : : OFSTNone;
6170 : }
6171 :
6172 928 : const std::string osWantedOGRFieldName = osFieldPrefix + fieldName;
6173 928 : OGRFieldDefn oFieldDefn(osWantedOGRFieldName.c_str(), eTypeOut);
6174 464 : oFieldDefn.SetSubType(eSubTypeOut);
6175 464 : if (eTypeOut == eTypeIn && eSubTypeOut == eSubTypeIn)
6176 : {
6177 444 : oFieldDefn.SetWidth(nWidth);
6178 444 : oFieldDefn.SetPrecision(nPrecision);
6179 : }
6180 464 : oFieldDefn.SetNullable((schema->flags & ARROW_FLAG_NULLABLE) != 0);
6181 :
6182 464 : if (schema->metadata)
6183 : {
6184 58 : const auto oMetadata = OGRParseArrowMetadata(schema->metadata);
6185 59 : for (const auto &oIter : oMetadata)
6186 : {
6187 30 : if (oIter.first == MD_GDAL_OGR_TYPE)
6188 : {
6189 3 : const auto &osType = oIter.second;
6190 36 : for (auto eType = OFTInteger; eType <= OFTMaxType;)
6191 : {
6192 36 : if (OGRFieldDefn::GetFieldTypeName(eType) == osType)
6193 : {
6194 3 : oFieldDefn.SetType(eType);
6195 3 : break;
6196 : }
6197 33 : if (eType == OFTMaxType)
6198 0 : break;
6199 : else
6200 33 : eType = static_cast<OGRFieldType>(eType + 1);
6201 : }
6202 : }
6203 27 : else if (oIter.first == MD_GDAL_OGR_ALTERNATIVE_NAME)
6204 2 : oFieldDefn.SetAlternativeName(oIter.second.c_str());
6205 25 : else if (oIter.first == MD_GDAL_OGR_COMMENT)
6206 2 : oFieldDefn.SetComment(oIter.second);
6207 23 : else if (oIter.first == MD_GDAL_OGR_DEFAULT)
6208 2 : oFieldDefn.SetDefault(oIter.second.c_str());
6209 21 : else if (oIter.first == MD_GDAL_OGR_SUBTYPE)
6210 : {
6211 5 : if (eTypeIn == eTypeOut)
6212 : {
6213 4 : const auto &osSubType = oIter.second;
6214 4 : for (auto eSubType = OFSTNone;
6215 15 : eSubType <= OFSTMaxSubType;)
6216 : {
6217 15 : if (OGRFieldDefn::GetFieldSubTypeName(eSubType) ==
6218 : osSubType)
6219 : {
6220 4 : oFieldDefn.SetSubType(eSubType);
6221 4 : break;
6222 : }
6223 11 : if (eSubType == OFSTMaxSubType)
6224 0 : break;
6225 : else
6226 11 : eSubType =
6227 11 : static_cast<OGRFieldSubType>(eSubType + 1);
6228 : }
6229 : }
6230 : }
6231 16 : else if (oIter.first == MD_GDAL_OGR_WIDTH)
6232 5 : oFieldDefn.SetWidth(atoi(oIter.second.c_str()));
6233 11 : else if (oIter.first == MD_GDAL_OGR_UNIQUE)
6234 2 : oFieldDefn.SetUnique(oIter.second == "true");
6235 9 : else if (oIter.first == MD_GDAL_OGR_DOMAIN_NAME)
6236 : {
6237 2 : if (poDS && poDS->GetFieldDomain(oIter.second))
6238 2 : oFieldDefn.SetDomainName(oIter.second);
6239 : }
6240 13 : else if (oIter.first == ARROW_EXTENSION_NAME_KEY &&
6241 6 : (oIter.second == EXTENSION_NAME_ARROW_JSON ||
6242 : // Used by BigQuery through ADBC driver
6243 0 : oIter.second == "google:sqlType:json"))
6244 : {
6245 6 : oFieldDefn.SetSubType(OFSTJSON);
6246 : }
6247 1 : else if (oIter.first == ARROW_EXTENSION_NAME_KEY)
6248 : {
6249 0 : CPLDebug("OGR", "Unknown Arrow extension: %s",
6250 : oIter.second.c_str());
6251 : }
6252 : else
6253 : {
6254 1 : CPLDebug("OGR", "Unknown field metadata: %s",
6255 : oIter.first.c_str());
6256 : }
6257 : }
6258 : }
6259 464 : auto poLayerDefn = GetLayerDefn();
6260 464 : const int nFieldCountBefore = poLayerDefn->GetFieldCount();
6261 928 : if (CreateField(&oFieldDefn) != OGRERR_NONE ||
6262 464 : nFieldCountBefore + 1 != poLayerDefn->GetFieldCount())
6263 : {
6264 0 : return false;
6265 : }
6266 : const char *pszActualFieldName =
6267 464 : poLayerDefn->GetFieldDefn(nFieldCountBefore)->GetNameRef();
6268 464 : if (pszActualFieldName != osWantedOGRFieldName)
6269 : {
6270 : m_poPrivate
6271 1 : ->m_oMapArrowFieldNameToOGRFieldName[osWantedOGRFieldName] =
6272 1 : pszActualFieldName;
6273 : }
6274 464 : return true;
6275 464 : };
6276 :
6277 8146 : for (const auto &sType : gasArrowTypesToOGR)
6278 : {
6279 7915 : if (strcmp(format, sType.arrowType) == 0)
6280 : {
6281 233 : return AddField(sType.eType, sType.eSubType, 0, 0);
6282 : }
6283 : }
6284 :
6285 231 : if (IsMap(format))
6286 : {
6287 70 : return AddField(OFTString, OFSTJSON, 0, 0);
6288 : }
6289 :
6290 161 : if (IsTimestamp(format))
6291 : {
6292 20 : return AddField(OFTDateTime, OFSTNone, 0, 0);
6293 : }
6294 :
6295 141 : if (IsFixedWidthBinary(format))
6296 : {
6297 3 : return AddField(OFTBinary, OFSTNone, GetFixedWithBinary(format), 0);
6298 : }
6299 :
6300 138 : if (IsList(format) || IsLargeList(format) || IsFixedSizeList(format))
6301 : {
6302 132 : const char *childFormat = schema->children[0]->format;
6303 1103 : for (const auto &sType : gasListTypes)
6304 : {
6305 1088 : if (childFormat[0] == sType.arrowLetter && childFormat[1] == 0)
6306 : {
6307 117 : return AddField(sType.eType, sType.eSubType, 0, 0);
6308 : }
6309 : }
6310 :
6311 15 : if (IsDecimal(childFormat))
6312 : {
6313 7 : int nPrecision = 0;
6314 7 : int nScale = 0;
6315 7 : int nWidthInBytes = 0;
6316 7 : if (!ParseDecimalFormat(childFormat, nPrecision, nScale,
6317 : nWidthInBytes))
6318 : {
6319 0 : CPLError(CE_Failure, CPLE_AppDefined, "%s",
6320 0 : (std::string("Invalid field format ") + format +
6321 0 : " for field " + osFieldPrefix + fieldName)
6322 : .c_str());
6323 0 : return false;
6324 : }
6325 :
6326 : const char *pszError =
6327 7 : GetErrorIfUnsupportedDecimal(nWidthInBytes, nPrecision);
6328 7 : if (pszError)
6329 : {
6330 0 : CPLError(CE_Failure, CPLE_NotSupported, "%s", pszError);
6331 0 : return false;
6332 : }
6333 :
6334 : // DBF convention: add space for negative sign and decimal separator
6335 7 : return AddField(OFTRealList, OFSTNone, nPrecision + 2, nScale);
6336 : }
6337 :
6338 8 : if (IsSupportForJSONObj(schema->children[0]))
6339 : {
6340 8 : return AddField(OFTString, OFSTJSON, 0, 0);
6341 : }
6342 :
6343 0 : CPLError(CE_Failure, CPLE_NotSupported, "%s",
6344 0 : ("List of type '" + std::string(childFormat) + "' for field " +
6345 0 : osFieldPrefix + fieldName + " is not supported.")
6346 : .c_str());
6347 0 : return false;
6348 : }
6349 :
6350 6 : if (IsDecimal(format))
6351 : {
6352 6 : int nPrecision = 0;
6353 6 : int nScale = 0;
6354 6 : int nWidthInBytes = 0;
6355 6 : if (!ParseDecimalFormat(format, nPrecision, nScale, nWidthInBytes))
6356 : {
6357 0 : CPLError(CE_Failure, CPLE_AppDefined, "%s",
6358 0 : (std::string("Invalid field format ") + format +
6359 0 : " for field " + osFieldPrefix + fieldName)
6360 : .c_str());
6361 0 : return false;
6362 : }
6363 :
6364 : const char *pszError =
6365 6 : GetErrorIfUnsupportedDecimal(nWidthInBytes, nPrecision);
6366 6 : if (pszError)
6367 : {
6368 0 : CPLError(CE_Failure, CPLE_NotSupported, "%s", pszError);
6369 0 : return false;
6370 : }
6371 :
6372 : // DBF convention: add space for negative sign and decimal separator
6373 6 : return AddField(OFTReal, OFSTNone, nPrecision + 2, nScale);
6374 : }
6375 :
6376 0 : CPLError(CE_Failure, CPLE_NotSupported, "%s",
6377 0 : ("Type '" + std::string(format) + "' for field " + osFieldPrefix +
6378 0 : fieldName + " is not supported.")
6379 : .c_str());
6380 0 : return false;
6381 : }
6382 :
6383 : //! @endcond
6384 :
6385 : /** Creates a field from an ArrowSchema.
6386 : *
6387 : * This should only be used for attribute fields. Geometry fields should
6388 : * be created with CreateGeomField(). The FID field should also not be
6389 : * passed with this method.
6390 : *
6391 : * Contrary to the IsArrowSchemaSupported() and WriteArrowBatch() methods, the
6392 : * passed schema must be for an individual field, and thus, is *not* of type
6393 : * struct (format=+s) (unless writing a set of fields grouped together in the
6394 : * same structure).
6395 : *
6396 : * Additional field metadata can be speciffed through the ArrowSchema::metadata
6397 : * field with the potential following items:
6398 : * <ul>
6399 : * <li>"GDAL:OGR:alternative_name": value of
6400 : * OGRFieldDefn::GetAlternativeNameRef()</li>
6401 : * <li>"GDAL:OGR:comment": value of OGRFieldDefn::GetComment()</li>
6402 : * <li>"GDAL:OGR:default": value of OGRFieldDefn::GetDefault()</li>
6403 : * <li>"GDAL:OGR:subtype": value of OGRFieldDefn::GetSubType()</li>
6404 : * <li>"GDAL:OGR:width": value of OGRFieldDefn::GetWidth() (serialized as a
6405 : * string)</li>
6406 : * <li>"GDAL:OGR:unique": value of OGRFieldDefn::IsUnique() (serialized as
6407 : * "true" or "false")</li>
6408 : * <li>"GDAL:OGR:domain_name": value of OGRFieldDefn::GetDomainName()</li>
6409 : * </ul>
6410 : *
6411 : * This method and CreateField() are mutually exclusive in the same session.
6412 : *
6413 : * This method is the same as the C function OGR_L_CreateFieldFromArrowSchema().
6414 : *
6415 : * @param schema Schema of the field to create.
6416 : * @param papszOptions Options (none currently). Null terminated list, or nullptr.
6417 : * @return true in case of success
6418 : * @since 3.8
6419 : */
6420 453 : bool OGRLayer::CreateFieldFromArrowSchema(const struct ArrowSchema *schema,
6421 : CSLConstList papszOptions)
6422 : {
6423 906 : return CreateFieldFromArrowSchemaInternal(schema, std::string(),
6424 906 : papszOptions);
6425 : }
6426 :
6427 : /************************************************************************/
6428 : /* OGR_L_CreateFieldFromArrowSchema() */
6429 : /************************************************************************/
6430 :
6431 : /** Creates a field from an ArrowSchema.
6432 : *
6433 : * This should only be used for attribute fields. Geometry fields should
6434 : * be created with CreateGeomField(). The FID field should also not be
6435 : * passed with this method.
6436 : *
6437 : * Contrary to the IsArrowSchemaSupported() and WriteArrowBatch() methods, the
6438 : * passed schema must be for an individual field, and thus, is *not* of type
6439 : * struct (format=+s) (unless writing a set of fields grouped together in the
6440 : * same structure).
6441 : *
6442 : * Additional field metadata can be speciffed through the ArrowSchema::metadata
6443 : * field with the potential following items:
6444 : * <ul>
6445 : * <li>"GDAL:OGR:alternative_name": value of
6446 : * OGRFieldDefn::GetAlternativeNameRef()</li>
6447 : * <li>"GDAL:OGR:comment": value of OGRFieldDefn::GetComment()</li>
6448 : * <li>"GDAL:OGR:default": value of OGRFieldDefn::GetDefault()</li>
6449 : * <li>"GDAL:OGR:subtype": value of OGRFieldDefn::GetSubType()</li>
6450 : * <li>"GDAL:OGR:width": value of OGRFieldDefn::GetWidth() (serialized as a
6451 : * string)</li>
6452 : * <li>"GDAL:OGR:unique": value of OGRFieldDefn::IsUnique() (serialized as
6453 : * "true" or "false")</li>
6454 : * <li>"GDAL:OGR:domain_name": value of OGRFieldDefn::GetDomainName()</li>
6455 : * </ul>
6456 : *
6457 : * This method and CreateField() are mutually exclusive in the same session.
6458 : *
6459 : * This method is the same as the C++ method OGRLayer::CreateFieldFromArrowSchema().
6460 : *
6461 : * @param hLayer Layer.
6462 : * @param schema Schema of the field to create.
6463 : * @param papszOptions Options (none currently). Null terminated list, or nullptr.
6464 : * @return true in case of success
6465 : * @since 3.8
6466 : */
6467 541 : bool OGR_L_CreateFieldFromArrowSchema(OGRLayerH hLayer,
6468 : const struct ArrowSchema *schema,
6469 : char **papszOptions)
6470 : {
6471 541 : VALIDATE_POINTER1(hLayer, __func__, false);
6472 541 : VALIDATE_POINTER1(schema, __func__, false);
6473 :
6474 1082 : return OGRLayer::FromHandle(hLayer)->CreateFieldFromArrowSchema(
6475 541 : schema, papszOptions);
6476 : }
6477 :
6478 : /************************************************************************/
6479 : /* BuildOGRFieldInfo() */
6480 : /************************************************************************/
6481 :
6482 : constexpr int FID_COLUMN_SPECIAL_OGR_FIELD_IDX = -2;
6483 :
6484 : struct FieldInfo
6485 : {
6486 : std::string osName{};
6487 : int iOGRFieldIdx = -1;
6488 : const char *format = nullptr;
6489 : OGRFieldType eNominalFieldType =
6490 : OFTMaxType; // OGR data type that would best match the Arrow type
6491 : OGRFieldType eTargetFieldType =
6492 : OFTMaxType; // actual OGR data type of the layer field
6493 : // OGR data type of the feature passed to FillFeature()
6494 : OGRFieldType eSetFeatureFieldType = OFTMaxType;
6495 : bool bIsGeomCol = false;
6496 : bool bUseDictionary = false;
6497 : bool bUseStringOptim = false;
6498 : int nWidthInBytes = 0; // only used for decimal fields
6499 : int nPrecision = 0; // only used for decimal fields
6500 : int nScale = 0; // only used for decimal fields
6501 : };
6502 :
6503 771 : static bool BuildOGRFieldInfo(
6504 : const struct ArrowSchema *schema, struct ArrowArray *array,
6505 : const OGRFeatureDefn *poFeatureDefn, const std::string &osFieldPrefix,
6506 : const CPLStringList &aosNativeTypes, bool &bFallbackTypesUsed,
6507 : std::vector<FieldInfo> &asFieldInfo, const char *pszFIDName,
6508 : const char *pszGeomFieldName, OGRLayer *poLayer,
6509 : const std::map<std::string, std::string> &oMapArrowFieldNameToOGRFieldName,
6510 : const struct ArrowSchema *&schemaFIDColumn,
6511 : struct ArrowArray *&arrayFIDColumn)
6512 : {
6513 771 : const char *fieldName = schema->name;
6514 771 : const char *format = schema->format;
6515 771 : if (IsStructure(format))
6516 : {
6517 18 : const std::string osNewPrefix(osFieldPrefix + fieldName + ".");
6518 39 : for (int64_t i = 0; i < array->n_children; ++i)
6519 : {
6520 30 : if (!BuildOGRFieldInfo(schema->children[i], array->children[i],
6521 : poFeatureDefn, osNewPrefix, aosNativeTypes,
6522 : bFallbackTypesUsed, asFieldInfo, pszFIDName,
6523 : pszGeomFieldName, poLayer,
6524 : oMapArrowFieldNameToOGRFieldName,
6525 : schemaFIDColumn, arrayFIDColumn))
6526 : {
6527 0 : return false;
6528 : }
6529 : }
6530 9 : return true;
6531 : }
6532 :
6533 1524 : FieldInfo sInfo;
6534 :
6535 781 : if (schema->dictionary &&
6536 19 : !IsKnownCodedFieldDomain(poLayer, schema->metadata))
6537 : {
6538 15 : if (!IsValidDictionaryIndexType(format))
6539 : {
6540 0 : CPLError(CE_Failure, CPLE_NotSupported,
6541 : "Dictionary only supported if the parent is of "
6542 : "type [U]Int[8|16|32|64]");
6543 0 : return false;
6544 : }
6545 :
6546 15 : sInfo.bUseDictionary = true;
6547 15 : schema = schema->dictionary;
6548 15 : format = schema->format;
6549 15 : array = array->dictionary;
6550 : }
6551 :
6552 762 : sInfo.osName = osFieldPrefix + fieldName;
6553 762 : sInfo.format = format;
6554 762 : if (pszFIDName && sInfo.osName == pszFIDName)
6555 : {
6556 34 : if (IsInt32(format) || IsInt64(format))
6557 : {
6558 33 : sInfo.iOGRFieldIdx = FID_COLUMN_SPECIAL_OGR_FIELD_IDX;
6559 33 : schemaFIDColumn = schema;
6560 33 : arrayFIDColumn = array;
6561 : }
6562 : else
6563 : {
6564 1 : CPLError(CE_Failure, CPLE_AppDefined,
6565 : "FID column '%s' should be of Arrow format 'i' "
6566 : "(int32) or 'l' (int64)",
6567 : sInfo.osName.c_str());
6568 1 : return false;
6569 : }
6570 : }
6571 : else
6572 : {
6573 : const std::string &osExpectedOGRFieldName =
6574 2183 : [&oMapArrowFieldNameToOGRFieldName, &sInfo]() -> const std::string &
6575 : {
6576 : const auto oIter =
6577 728 : oMapArrowFieldNameToOGRFieldName.find(sInfo.osName);
6578 728 : if (oIter != oMapArrowFieldNameToOGRFieldName.end())
6579 1 : return oIter->second;
6580 727 : return sInfo.osName;
6581 728 : }();
6582 728 : sInfo.iOGRFieldIdx =
6583 728 : poFeatureDefn->GetFieldIndex(osExpectedOGRFieldName.c_str());
6584 728 : if (sInfo.iOGRFieldIdx >= 0)
6585 : {
6586 652 : bool bTypeOK = false;
6587 : const auto eOGRType =
6588 652 : poFeatureDefn->GetFieldDefn(sInfo.iOGRFieldIdx)->GetType();
6589 652 : sInfo.eTargetFieldType = eOGRType;
6590 11668 : for (const auto &sType : gasArrowTypesToOGR)
6591 : {
6592 11325 : if (strcmp(format, sType.arrowType) == 0)
6593 : {
6594 309 : sInfo.bUseStringOptim = sType.eType == OFTString;
6595 309 : sInfo.eNominalFieldType = sType.eType;
6596 309 : if (eOGRType == sInfo.eNominalFieldType)
6597 : {
6598 279 : bTypeOK = true;
6599 279 : break;
6600 : }
6601 30 : else if (eOGRType == OFTString)
6602 : {
6603 4 : bFallbackTypesUsed = true;
6604 4 : bTypeOK = true;
6605 4 : break;
6606 : }
6607 26 : else if (eOGRType == OFTInteger &&
6608 10 : sType.eType == OFTInteger64)
6609 : {
6610 : // Potentially lossy.
6611 4 : CPLDebug("OGR",
6612 : "For field %s, writing from Arrow array of "
6613 : "type Int64 into OGR Int32 field. "
6614 : "Potentially loss conversion can happen",
6615 : sInfo.osName.c_str());
6616 4 : bFallbackTypesUsed = true;
6617 4 : bTypeOK = true;
6618 4 : break;
6619 : }
6620 22 : else if (eOGRType == OFTInteger && sType.eType == OFTReal)
6621 : {
6622 : // Potentially lossy.
6623 6 : CPLDebug("OGR",
6624 : "For field %s, writing from Arrow array of "
6625 : "type Real into OGR Int32 field. "
6626 : "Potentially loss conversion can happen",
6627 : sInfo.osName.c_str());
6628 6 : bFallbackTypesUsed = true;
6629 6 : bTypeOK = true;
6630 6 : break;
6631 : }
6632 16 : else if (eOGRType == OFTInteger64 && sType.eType == OFTReal)
6633 : {
6634 : // Potentially lossy.
6635 6 : CPLDebug("OGR",
6636 : "For field %s, writing from Arrow array of "
6637 : "type Real into OGR Int64 field. "
6638 : "Potentially loss conversion can happen",
6639 : sInfo.osName.c_str());
6640 6 : bFallbackTypesUsed = true;
6641 6 : bTypeOK = true;
6642 6 : break;
6643 : }
6644 10 : else if (eOGRType == OFTReal && sType.eType == OFTInteger64)
6645 : {
6646 : // Potentially lossy.
6647 4 : CPLDebug("OGR",
6648 : "For field %s, writing from Arrow array of "
6649 : "type Int64 into OGR Real field. "
6650 : "Potentially loss conversion can happen",
6651 : sInfo.osName.c_str());
6652 4 : bFallbackTypesUsed = true;
6653 4 : bTypeOK = true;
6654 4 : break;
6655 : }
6656 6 : else if ((eOGRType == OFTInteger64 ||
6657 4 : eOGRType == OFTReal) &&
6658 4 : sType.eType == OFTInteger)
6659 : {
6660 : // Non-lossy
6661 4 : bFallbackTypesUsed = true;
6662 4 : bTypeOK = true;
6663 4 : break;
6664 : }
6665 2 : else if (eOGRType == OFTDateTime &&
6666 2 : sType.eType == OFTString)
6667 : {
6668 2 : bFallbackTypesUsed = true;
6669 2 : bTypeOK = true;
6670 2 : break;
6671 : }
6672 : else
6673 : {
6674 0 : CPLError(CE_Failure, CPLE_AppDefined,
6675 : "For field %s, OGR field type is %s whereas "
6676 : "Arrow type implies %s",
6677 : sInfo.osName.c_str(),
6678 : OGR_GetFieldTypeName(eOGRType),
6679 0 : OGR_GetFieldTypeName(sType.eType));
6680 0 : return false;
6681 : }
6682 : }
6683 : }
6684 :
6685 652 : if (!bTypeOK && IsMap(format))
6686 : {
6687 106 : sInfo.eNominalFieldType = OFTString;
6688 106 : if (eOGRType == sInfo.eNominalFieldType)
6689 : {
6690 106 : bTypeOK = true;
6691 : }
6692 : else
6693 : {
6694 0 : CPLError(CE_Failure, CPLE_AppDefined,
6695 : "For field %s, OGR field type is %s whereas "
6696 : "Arrow type implies %s",
6697 : sInfo.osName.c_str(),
6698 : OGR_GetFieldTypeName(eOGRType),
6699 : OGR_GetFieldTypeName(OFTString));
6700 0 : return false;
6701 : }
6702 : }
6703 :
6704 652 : if (!bTypeOK && IsTimestamp(format))
6705 : {
6706 32 : sInfo.eNominalFieldType = OFTDateTime;
6707 32 : if (eOGRType == sInfo.eNominalFieldType)
6708 : {
6709 31 : bTypeOK = true;
6710 : }
6711 1 : else if (eOGRType == OFTString)
6712 : {
6713 1 : bFallbackTypesUsed = true;
6714 1 : bTypeOK = true;
6715 : }
6716 : else
6717 : {
6718 0 : CPLError(CE_Failure, CPLE_AppDefined,
6719 : "For field %s, OGR field type is %s whereas "
6720 : "Arrow type implies %s",
6721 : sInfo.osName.c_str(),
6722 : OGR_GetFieldTypeName(eOGRType),
6723 : OGR_GetFieldTypeName(OFTDateTime));
6724 0 : return false;
6725 : }
6726 : }
6727 :
6728 652 : if (!bTypeOK && IsFixedWidthBinary(format))
6729 : {
6730 5 : sInfo.eNominalFieldType = OFTBinary;
6731 5 : if (eOGRType == sInfo.eNominalFieldType)
6732 : {
6733 5 : bTypeOK = true;
6734 : }
6735 0 : else if (eOGRType == OFTString)
6736 : {
6737 0 : bFallbackTypesUsed = true;
6738 0 : bTypeOK = true;
6739 : }
6740 : else
6741 : {
6742 0 : CPLError(CE_Failure, CPLE_AppDefined,
6743 : "For field %s, OGR field type is %s whereas "
6744 : "Arrow type implies %s",
6745 : sInfo.osName.c_str(),
6746 : OGR_GetFieldTypeName(eOGRType),
6747 : OGR_GetFieldTypeName(OFTBinary));
6748 0 : return false;
6749 : }
6750 : }
6751 :
6752 725 : if (!bTypeOK && (IsList(format) || IsLargeList(format) ||
6753 73 : IsFixedSizeList(format)))
6754 : {
6755 190 : const char *childFormat = schema->children[0]->format;
6756 1565 : for (const auto &sType : gasListTypes)
6757 : {
6758 1544 : if (childFormat[0] == sType.arrowLetter &&
6759 169 : childFormat[1] == 0)
6760 : {
6761 169 : sInfo.eNominalFieldType = sType.eType;
6762 169 : if (eOGRType == sInfo.eNominalFieldType)
6763 : {
6764 154 : bTypeOK = true;
6765 154 : break;
6766 : }
6767 15 : else if (eOGRType == OFTString)
6768 : {
6769 15 : bFallbackTypesUsed = true;
6770 15 : bTypeOK = true;
6771 15 : break;
6772 : }
6773 : else
6774 : {
6775 0 : CPLError(CE_Failure, CPLE_AppDefined,
6776 : "For field %s, OGR field type is %s "
6777 : "whereas "
6778 : "Arrow type implies %s",
6779 : sInfo.osName.c_str(),
6780 : OGR_GetFieldTypeName(eOGRType),
6781 0 : OGR_GetFieldTypeName(sType.eType));
6782 0 : return false;
6783 : }
6784 : }
6785 : }
6786 :
6787 190 : if (!bTypeOK && IsDecimal(childFormat))
6788 : {
6789 11 : if (!ParseDecimalFormat(childFormat, sInfo.nPrecision,
6790 : sInfo.nScale, sInfo.nWidthInBytes))
6791 : {
6792 0 : CPLError(CE_Failure, CPLE_AppDefined, "%s",
6793 0 : (std::string("Invalid field format ") +
6794 0 : childFormat + " for field " + osFieldPrefix +
6795 : fieldName)
6796 : .c_str());
6797 0 : return false;
6798 : }
6799 :
6800 11 : const char *pszError = GetErrorIfUnsupportedDecimal(
6801 : sInfo.nWidthInBytes, sInfo.nPrecision);
6802 11 : if (pszError)
6803 : {
6804 0 : CPLError(CE_Failure, CPLE_NotSupported, "%s", pszError);
6805 0 : return false;
6806 : }
6807 :
6808 11 : sInfo.eNominalFieldType = OFTRealList;
6809 11 : if (eOGRType == sInfo.eNominalFieldType)
6810 : {
6811 11 : bTypeOK = true;
6812 : }
6813 0 : else if (eOGRType == OFTString)
6814 : {
6815 0 : bFallbackTypesUsed = true;
6816 0 : bTypeOK = true;
6817 : }
6818 : else
6819 : {
6820 0 : CPLError(CE_Failure, CPLE_AppDefined,
6821 : "For field %s, OGR field type is %s whereas "
6822 : "Arrow type implies %s",
6823 : sInfo.osName.c_str(),
6824 : OGR_GetFieldTypeName(eOGRType),
6825 : OGR_GetFieldTypeName(OFTRealList));
6826 0 : return false;
6827 : }
6828 : }
6829 :
6830 190 : if (!bTypeOK && IsSupportForJSONObj(schema->children[0]))
6831 : {
6832 10 : sInfo.eNominalFieldType = OFTString;
6833 10 : if (eOGRType == sInfo.eNominalFieldType)
6834 : {
6835 10 : bTypeOK = true;
6836 : }
6837 : else
6838 : {
6839 0 : CPLError(CE_Failure, CPLE_AppDefined,
6840 : "For field %s, OGR field type is %s whereas "
6841 : "Arrow type implies %s",
6842 : sInfo.osName.c_str(),
6843 : OGR_GetFieldTypeName(eOGRType),
6844 : OGR_GetFieldTypeName(OFTString));
6845 0 : return false;
6846 : }
6847 : }
6848 :
6849 190 : if (!bTypeOK)
6850 : {
6851 0 : CPLError(CE_Failure, CPLE_NotSupported, "%s",
6852 0 : ("List of type '" + std::string(childFormat) +
6853 0 : "' for field " + osFieldPrefix + fieldName +
6854 : " is not supported.")
6855 : .c_str());
6856 0 : return false;
6857 : }
6858 : }
6859 :
6860 652 : if (!bTypeOK && IsDecimal(format))
6861 : {
6862 10 : if (!ParseDecimalFormat(format, sInfo.nPrecision, sInfo.nScale,
6863 : sInfo.nWidthInBytes))
6864 : {
6865 0 : CPLError(CE_Failure, CPLE_AppDefined, "%s",
6866 0 : (std::string("Invalid field format ") + format +
6867 0 : " for field " + osFieldPrefix + fieldName)
6868 : .c_str());
6869 0 : return false;
6870 : }
6871 :
6872 10 : const char *pszError = GetErrorIfUnsupportedDecimal(
6873 : sInfo.nWidthInBytes, sInfo.nPrecision);
6874 10 : if (pszError)
6875 : {
6876 0 : CPLError(CE_Failure, CPLE_NotSupported, "%s", pszError);
6877 0 : return false;
6878 : }
6879 :
6880 10 : sInfo.eNominalFieldType = OFTReal;
6881 10 : if (eOGRType == sInfo.eNominalFieldType)
6882 : {
6883 10 : bTypeOK = true;
6884 : }
6885 0 : else if (eOGRType == OFTString)
6886 : {
6887 0 : bFallbackTypesUsed = true;
6888 0 : bTypeOK = true;
6889 : }
6890 : else
6891 : {
6892 0 : CPLError(CE_Failure, CPLE_AppDefined,
6893 : "For field %s, OGR field type is %s whereas "
6894 : "Arrow type implies %s",
6895 : sInfo.osName.c_str(),
6896 : OGR_GetFieldTypeName(eOGRType),
6897 : OGR_GetFieldTypeName(OFTReal));
6898 0 : return false;
6899 : }
6900 : }
6901 :
6902 652 : if (!bTypeOK)
6903 : {
6904 0 : CPLError(CE_Failure, CPLE_NotSupported, "%s",
6905 0 : ("Type '" + std::string(format) + "' for field " +
6906 0 : osFieldPrefix + fieldName + " is not supported.")
6907 : .c_str());
6908 0 : return false;
6909 : }
6910 : }
6911 : else
6912 : {
6913 76 : sInfo.iOGRFieldIdx = poFeatureDefn->GetGeomFieldIndex(
6914 76 : osExpectedOGRFieldName.c_str());
6915 76 : if (sInfo.iOGRFieldIdx < 0)
6916 : {
6917 51 : if (pszGeomFieldName && pszGeomFieldName == sInfo.osName)
6918 : {
6919 46 : if (poFeatureDefn->GetGeomFieldCount() == 0)
6920 : {
6921 0 : CPLError(CE_Failure, CPLE_AppDefined,
6922 : "Cannot find OGR geometry field for Arrow "
6923 : "array %s",
6924 : sInfo.osName.c_str());
6925 0 : return false;
6926 : }
6927 46 : sInfo.iOGRFieldIdx = 0;
6928 : }
6929 : else
6930 : {
6931 : // Check if ARROW:extension:name = ogc.wkb or geoarrow.wkb
6932 5 : const char *pabyMetadata = schema->metadata;
6933 5 : if (pabyMetadata)
6934 : {
6935 : const auto oMetadata =
6936 5 : OGRParseArrowMetadata(pabyMetadata);
6937 5 : auto oIter = oMetadata.find(ARROW_EXTENSION_NAME_KEY);
6938 10 : if (oIter != oMetadata.end() &&
6939 5 : (oIter->second == EXTENSION_NAME_OGC_WKB ||
6940 0 : oIter->second == EXTENSION_NAME_GEOARROW_WKB))
6941 : {
6942 5 : if (poFeatureDefn->GetGeomFieldCount() == 0)
6943 : {
6944 0 : CPLError(CE_Failure, CPLE_AppDefined,
6945 : "Cannot find OGR geometry field "
6946 : "for Arrow array %s",
6947 : sInfo.osName.c_str());
6948 0 : return false;
6949 : }
6950 5 : sInfo.iOGRFieldIdx = 0;
6951 : }
6952 : }
6953 : }
6954 :
6955 51 : if (sInfo.iOGRFieldIdx < 0)
6956 : {
6957 0 : CPLError(CE_Failure, CPLE_AppDefined,
6958 : "Cannot find OGR field for Arrow array %s",
6959 : sInfo.osName.c_str());
6960 0 : return false;
6961 : }
6962 : }
6963 :
6964 76 : if (!IsBinary(format) && !IsLargeBinary(format))
6965 : {
6966 0 : CPLError(CE_Failure, CPLE_AppDefined,
6967 : "Geometry column '%s' should be of Arrow format "
6968 : "'z' (binary) or 'Z' (large binary)",
6969 : sInfo.osName.c_str());
6970 0 : return false;
6971 : }
6972 76 : sInfo.bIsGeomCol = true;
6973 : }
6974 : }
6975 :
6976 761 : asFieldInfo.emplace_back(std::move(sInfo));
6977 761 : return true;
6978 : }
6979 :
6980 : /************************************************************************/
6981 : /* GetUInt64Value() */
6982 : /************************************************************************/
6983 :
6984 90 : static inline uint64_t GetUInt64Value(const struct ArrowSchema *schema,
6985 : const struct ArrowArray *array,
6986 : size_t iFeature)
6987 : {
6988 90 : uint64_t nVal = 0;
6989 90 : CPLAssert(schema->format[1] == 0);
6990 90 : switch (schema->format[0])
6991 : {
6992 8 : case ARROW_LETTER_INT8:
6993 8 : nVal = GetValue<int8_t>(array, iFeature);
6994 8 : break;
6995 8 : case ARROW_LETTER_UINT8:
6996 8 : nVal = GetValue<uint8_t>(array, iFeature);
6997 8 : break;
6998 8 : case ARROW_LETTER_INT16:
6999 8 : nVal = GetValue<int16_t>(array, iFeature);
7000 8 : break;
7001 8 : case ARROW_LETTER_UINT16:
7002 8 : nVal = GetValue<uint16_t>(array, iFeature);
7003 8 : break;
7004 34 : case ARROW_LETTER_INT32:
7005 34 : nVal = GetValue<int32_t>(array, iFeature);
7006 34 : break;
7007 8 : case ARROW_LETTER_UINT32:
7008 8 : nVal = GetValue<uint32_t>(array, iFeature);
7009 8 : break;
7010 8 : case ARROW_LETTER_INT64:
7011 8 : nVal = GetValue<int64_t>(array, iFeature);
7012 8 : break;
7013 8 : case ARROW_LETTER_UINT64:
7014 8 : nVal = GetValue<uint64_t>(array, iFeature);
7015 8 : break;
7016 0 : default:
7017 : // Shouldn't happen given checks in BuildOGRFieldInfo()
7018 0 : CPLAssert(false);
7019 : break;
7020 : }
7021 90 : return nVal;
7022 : }
7023 :
7024 : /************************************************************************/
7025 : /* GetWorkingBufferSize() */
7026 : /************************************************************************/
7027 :
7028 1383840 : static size_t GetWorkingBufferSize(const struct ArrowSchema *schema,
7029 : const struct ArrowArray *array,
7030 : size_t iFeature, int &iArrowIdxInOut,
7031 : const std::vector<FieldInfo> &asFieldInfo)
7032 : {
7033 1383840 : const char *fieldName = schema->name;
7034 1383840 : const char *format = schema->format;
7035 1383840 : if (IsStructure(format))
7036 : {
7037 60677 : size_t nRet = 0;
7038 1383860 : for (int64_t i = 0; i < array->n_children; ++i)
7039 : {
7040 1323190 : nRet += GetWorkingBufferSize(
7041 1323190 : schema->children[i], array->children[i],
7042 1323190 : iFeature + static_cast<size_t>(array->offset), iArrowIdxInOut,
7043 : asFieldInfo);
7044 : }
7045 60677 : return nRet;
7046 : }
7047 1323170 : const int iArrowIdx = iArrowIdxInOut;
7048 1323170 : ++iArrowIdxInOut;
7049 :
7050 1323170 : if (!asFieldInfo[iArrowIdx].bUseStringOptim)
7051 122967 : return 0;
7052 :
7053 1200200 : const uint8_t *pabyValidity =
7054 1200200 : static_cast<const uint8_t *>(array->buffers[0]);
7055 1200320 : if (array->null_count != 0 && pabyValidity &&
7056 119 : !TestBit(pabyValidity, static_cast<size_t>(iFeature + array->offset)))
7057 : {
7058 : // empty string
7059 56 : return 0;
7060 : }
7061 :
7062 1200140 : if (asFieldInfo[iArrowIdx].bUseDictionary)
7063 : {
7064 41 : const uint64_t nDictIdx = GetUInt64Value(schema, array, iFeature);
7065 41 : const auto dictArray = array->dictionary;
7066 41 : if (nDictIdx >= static_cast<uint64_t>(dictArray->length))
7067 : {
7068 1 : CPLError(CE_Failure, CPLE_AppDefined,
7069 : "Feature %" PRIu64
7070 : ", field %s: invalid dictionary index: %" PRIu64,
7071 : static_cast<uint64_t>(iFeature), fieldName, nDictIdx);
7072 1 : return 0;
7073 : }
7074 :
7075 40 : array = dictArray;
7076 40 : schema = schema->dictionary;
7077 40 : format = schema->format;
7078 40 : iFeature = static_cast<size_t>(nDictIdx);
7079 : }
7080 :
7081 1200140 : if (IsString(format))
7082 : {
7083 1200130 : const auto *panOffsets =
7084 1200130 : static_cast<const uint32_t *>(array->buffers[1]) + array->offset;
7085 1200130 : return 1 + (panOffsets[iFeature + 1] - panOffsets[iFeature]);
7086 : }
7087 10 : else if (IsLargeString(format))
7088 : {
7089 10 : const auto *panOffsets =
7090 10 : static_cast<const uint64_t *>(array->buffers[1]) + array->offset;
7091 10 : return 1 + static_cast<size_t>(panOffsets[iFeature + 1] -
7092 10 : panOffsets[iFeature]);
7093 : }
7094 0 : return 0;
7095 : }
7096 :
7097 : /************************************************************************/
7098 : /* FillField() */
7099 : /************************************************************************/
7100 :
7101 : template <typename ArrowType, typename OGRType = ArrowType>
7102 729 : inline static void FillField(const struct ArrowArray *array, int iOGRFieldIdx,
7103 : size_t iFeature, OGRFeature &oFeature)
7104 : {
7105 729 : const auto *panValues = static_cast<const ArrowType *>(array->buffers[1]);
7106 729 : oFeature.SetFieldSameTypeUnsafe(
7107 : iOGRFieldIdx,
7108 729 : static_cast<OGRType>(panValues[iFeature + array->offset]));
7109 729 : }
7110 :
7111 : /************************************************************************/
7112 : /* FillFieldString() */
7113 : /************************************************************************/
7114 :
7115 : template <typename OffsetType>
7116 : inline static void
7117 1200140 : FillFieldString(const struct ArrowArray *array, int iOGRFieldIdx,
7118 : size_t iFeature, int iArrowIdx,
7119 : const std::vector<FieldInfo> &asFieldInfo,
7120 : std::string &osWorkingBuffer, OGRFeature &oFeature)
7121 : {
7122 1200140 : const auto *panOffsets =
7123 1200140 : static_cast<const OffsetType *>(array->buffers[1]) + array->offset;
7124 1200140 : const char *pszStr = static_cast<const char *>(array->buffers[2]);
7125 1200140 : const size_t nLen =
7126 1200140 : static_cast<size_t>(panOffsets[iFeature + 1] - panOffsets[iFeature]);
7127 1200140 : if (asFieldInfo[iArrowIdx].bUseStringOptim)
7128 : {
7129 1200140 : oFeature.SetFieldSameTypeUnsafe(
7130 1200140 : iOGRFieldIdx, &osWorkingBuffer[0] + osWorkingBuffer.size());
7131 1200140 : osWorkingBuffer.append(pszStr + panOffsets[iFeature], nLen);
7132 1200140 : osWorkingBuffer.push_back(0); // append null character
7133 : }
7134 : else
7135 : {
7136 0 : const std::string osTmp(pszStr, nLen);
7137 0 : oFeature.SetField(iOGRFieldIdx, osTmp.c_str());
7138 : }
7139 1200140 : }
7140 :
7141 : /************************************************************************/
7142 : /* FillFieldBinary() */
7143 : /************************************************************************/
7144 :
7145 : template <typename OffsetType>
7146 : inline static bool
7147 60605 : FillFieldBinary(const struct ArrowArray *array, int iOGRFieldIdx,
7148 : size_t iFeature, int iArrowIdx,
7149 : const std::vector<FieldInfo> &asFieldInfo,
7150 : const std::string &osFieldPrefix, const char *pszFieldName,
7151 : OGRFeature &oFeature)
7152 : {
7153 60605 : const auto *panOffsets =
7154 60605 : static_cast<const OffsetType *>(array->buffers[1]) + array->offset;
7155 60605 : const GByte *pabyData = static_cast<const GByte *>(array->buffers[2]) +
7156 60605 : static_cast<size_t>(panOffsets[iFeature]);
7157 60605 : const size_t nLen =
7158 60605 : static_cast<size_t>(panOffsets[iFeature + 1] - panOffsets[iFeature]);
7159 60605 : if (asFieldInfo[iArrowIdx].bIsGeomCol)
7160 : {
7161 60577 : size_t nBytesConsumedOut = 0;
7162 :
7163 : // Check if we can reuse the existing geometry, to save dynamic memory
7164 : // allocations.
7165 60577 : if (nLen >= 5 && pabyData[0] == wkbNDR && pabyData[1] <= wkbTriangle &&
7166 60570 : pabyData[2] == 0 && pabyData[3] == 0 && pabyData[4] == 0)
7167 : {
7168 60570 : const auto poExistingGeom = oFeature.GetGeomFieldRef(iOGRFieldIdx);
7169 121093 : if (poExistingGeom &&
7170 60523 : poExistingGeom->getGeometryType() == pabyData[1])
7171 : {
7172 60523 : poExistingGeom->importFromWkb(pabyData, nLen, wkbVariantIso,
7173 : nBytesConsumedOut);
7174 60523 : return true;
7175 : }
7176 : }
7177 :
7178 54 : OGRGeometry *poGeometry = nullptr;
7179 54 : OGRGeometryFactory::createFromWkb(pabyData, nullptr, &poGeometry, nLen,
7180 : wkbVariantIso, nBytesConsumedOut);
7181 54 : oFeature.SetGeomFieldDirectly(iOGRFieldIdx, poGeometry);
7182 : }
7183 : else
7184 : {
7185 28 : if (nLen > static_cast<size_t>(std::numeric_limits<int>::max()))
7186 : {
7187 0 : CPLError(CE_Failure, CPLE_NotSupported,
7188 : "Content for field %s%s is too large",
7189 : osFieldPrefix.c_str(), pszFieldName);
7190 0 : return false;
7191 : }
7192 28 : oFeature.SetField(iOGRFieldIdx, static_cast<int>(nLen), pabyData);
7193 : }
7194 82 : return true;
7195 : }
7196 :
7197 : /************************************************************************/
7198 : /* FillFeature() */
7199 : /************************************************************************/
7200 :
7201 1323190 : static bool FillFeature(OGRLayer *poLayer, const struct ArrowSchema *schema,
7202 : const struct ArrowArray *array,
7203 : const std::string &osFieldPrefix, size_t iFeature,
7204 : int &iArrowIdxInOut,
7205 : const std::vector<FieldInfo> &asFieldInfo,
7206 : OGRFeature &oFeature, std::string &osWorkingBuffer)
7207 :
7208 : {
7209 1323190 : const char *fieldName = schema->name;
7210 1323190 : const char *format = schema->format;
7211 1323190 : if (IsStructure(format))
7212 : {
7213 38 : const std::string osNewPrefix(osFieldPrefix + fieldName + ".");
7214 78 : for (int64_t i = 0; i < array->n_children; ++i)
7215 : {
7216 59 : if (!FillFeature(
7217 59 : poLayer, schema->children[i], array->children[i],
7218 59 : osNewPrefix, iFeature + static_cast<size_t>(array->offset),
7219 : iArrowIdxInOut, asFieldInfo, oFeature, osWorkingBuffer))
7220 0 : return false;
7221 : }
7222 19 : return true;
7223 : }
7224 1323170 : const int iArrowIdx = iArrowIdxInOut;
7225 1323170 : ++iArrowIdxInOut;
7226 1323170 : const int iOGRFieldIdx = asFieldInfo[iArrowIdx].iOGRFieldIdx;
7227 :
7228 1323170 : if (asFieldInfo[iArrowIdx].bUseDictionary)
7229 : {
7230 62 : format = schema->dictionary->format;
7231 : }
7232 :
7233 1323170 : if (array->null_count != 0)
7234 : {
7235 997 : const uint8_t *pabyValidity =
7236 997 : static_cast<const uint8_t *>(array->buffers[0]);
7237 1945 : if (pabyValidity &&
7238 948 : !TestBit(pabyValidity,
7239 948 : static_cast<size_t>(iFeature + array->offset)))
7240 : {
7241 287 : if (iOGRFieldIdx == FID_COLUMN_SPECIAL_OGR_FIELD_IDX)
7242 5 : oFeature.SetFID(OGRNullFID);
7243 282 : else if (asFieldInfo[iArrowIdx].bIsGeomCol)
7244 60 : oFeature.SetGeomFieldDirectly(iOGRFieldIdx, nullptr);
7245 222 : else if (asFieldInfo[iArrowIdx].eSetFeatureFieldType == OFTString)
7246 : {
7247 119 : OGRField *psField = oFeature.GetRawFieldRef(iOGRFieldIdx);
7248 119 : if (!asFieldInfo[iArrowIdx].bUseStringOptim)
7249 : {
7250 63 : if (IsValidField(psField))
7251 : {
7252 51 : CPLFree(psField->String);
7253 51 : OGR_RawField_SetNull(psField);
7254 : }
7255 : }
7256 : else
7257 : {
7258 56 : OGR_RawField_SetNull(psField);
7259 : }
7260 : }
7261 : else
7262 : {
7263 103 : OGRField *psField = oFeature.GetRawFieldRef(iOGRFieldIdx);
7264 103 : switch (asFieldInfo[iArrowIdx].eSetFeatureFieldType)
7265 : {
7266 47 : case OFTRealList:
7267 : case OFTIntegerList:
7268 : case OFTInteger64List:
7269 47 : if (IsValidField(psField))
7270 47 : CPLFree(psField->IntegerList.paList);
7271 47 : break;
7272 :
7273 7 : case OFTStringList:
7274 7 : if (IsValidField(psField))
7275 7 : CSLDestroy(psField->StringList.paList);
7276 7 : break;
7277 :
7278 1 : case OFTBinary:
7279 1 : if (IsValidField(psField))
7280 1 : CPLFree(psField->Binary.paData);
7281 1 : break;
7282 :
7283 48 : default:
7284 48 : break;
7285 : }
7286 103 : OGR_RawField_SetNull(psField);
7287 : }
7288 287 : return true;
7289 : }
7290 : }
7291 :
7292 1322880 : if (asFieldInfo[iArrowIdx].bUseDictionary)
7293 : {
7294 49 : const uint64_t nDictIdx = GetUInt64Value(schema, array, iFeature);
7295 49 : auto dictArray = array->dictionary;
7296 49 : if (nDictIdx >= static_cast<uint64_t>(dictArray->length))
7297 : {
7298 2 : CPLError(CE_Failure, CPLE_AppDefined,
7299 : "Feature %" PRIu64
7300 : ", field %s: invalid dictionary index: %" PRIu64,
7301 : static_cast<uint64_t>(iFeature),
7302 4 : (osFieldPrefix + fieldName).c_str(), nDictIdx);
7303 2 : return false;
7304 : }
7305 47 : array = dictArray;
7306 47 : schema = schema->dictionary;
7307 47 : iFeature = static_cast<size_t>(nDictIdx);
7308 : }
7309 :
7310 1322880 : if (IsBoolean(format))
7311 : {
7312 12 : const uint8_t *pabyValues =
7313 12 : static_cast<const uint8_t *>(array->buffers[1]);
7314 12 : oFeature.SetFieldSameTypeUnsafe(
7315 : iOGRFieldIdx,
7316 12 : TestBit(pabyValues, static_cast<size_t>(iFeature + array->offset))
7317 : ? 1
7318 : : 0);
7319 12 : return true;
7320 : }
7321 1322870 : else if (IsInt8(format))
7322 : {
7323 10 : FillField<int8_t>(array, iOGRFieldIdx, iFeature, oFeature);
7324 10 : return true;
7325 : }
7326 1322860 : else if (IsUInt8(format))
7327 : {
7328 10 : FillField<uint8_t>(array, iOGRFieldIdx, iFeature, oFeature);
7329 10 : return true;
7330 : }
7331 1322850 : else if (IsInt16(format))
7332 : {
7333 12 : FillField<int16_t>(array, iOGRFieldIdx, iFeature, oFeature);
7334 12 : return true;
7335 : }
7336 1322830 : else if (IsUInt16(format))
7337 : {
7338 10 : FillField<uint16_t>(array, iOGRFieldIdx, iFeature, oFeature);
7339 10 : return true;
7340 : }
7341 1322820 : else if (IsInt32(format))
7342 : {
7343 527 : if (iOGRFieldIdx == FID_COLUMN_SPECIAL_OGR_FIELD_IDX)
7344 : {
7345 2 : const auto *panValues =
7346 2 : static_cast<const int32_t *>(array->buffers[1]);
7347 2 : oFeature.SetFID(panValues[iFeature + array->offset]);
7348 : }
7349 : else
7350 : {
7351 525 : FillField<int32_t>(array, iOGRFieldIdx, iFeature, oFeature);
7352 : }
7353 527 : return true;
7354 : }
7355 1322300 : else if (IsUInt32(format))
7356 : {
7357 4 : FillField<uint32_t, GIntBig>(array, iOGRFieldIdx, iFeature, oFeature);
7358 4 : return true;
7359 : }
7360 1322290 : else if (IsInt64(format))
7361 : {
7362 60643 : if (iOGRFieldIdx == FID_COLUMN_SPECIAL_OGR_FIELD_IDX)
7363 : {
7364 60565 : const auto *panValues =
7365 60565 : static_cast<const int64_t *>(array->buffers[1]);
7366 60565 : oFeature.SetFID(panValues[iFeature + array->offset]);
7367 : }
7368 : else
7369 : {
7370 78 : FillField<int64_t, GIntBig>(array, iOGRFieldIdx, iFeature,
7371 : oFeature);
7372 : }
7373 60643 : return true;
7374 : }
7375 1261650 : else if (IsUInt64(format))
7376 : {
7377 10 : FillField<uint64_t, double>(array, iOGRFieldIdx, iFeature, oFeature);
7378 10 : return true;
7379 : }
7380 1261640 : else if (IsFloat32(format))
7381 : {
7382 12 : FillField<float, double>(array, iOGRFieldIdx, iFeature, oFeature);
7383 12 : return true;
7384 : }
7385 1261630 : else if (IsFloat64(format))
7386 : {
7387 58 : FillField<double>(array, iOGRFieldIdx, iFeature, oFeature);
7388 58 : return true;
7389 : }
7390 1261570 : else if (IsString(format))
7391 : {
7392 1200130 : FillFieldString<uint32_t>(array, iOGRFieldIdx, iFeature, iArrowIdx,
7393 : asFieldInfo, osWorkingBuffer, oFeature);
7394 1200130 : return true;
7395 : }
7396 61437 : else if (IsLargeString(format))
7397 : {
7398 10 : FillFieldString<uint64_t>(array, iOGRFieldIdx, iFeature, iArrowIdx,
7399 : asFieldInfo, osWorkingBuffer, oFeature);
7400 10 : return true;
7401 : }
7402 61427 : else if (IsBinary(format))
7403 : {
7404 60589 : return FillFieldBinary<uint32_t>(array, iOGRFieldIdx, iFeature,
7405 : iArrowIdx, asFieldInfo, osFieldPrefix,
7406 60589 : fieldName, oFeature);
7407 : }
7408 838 : else if (IsLargeBinary(format))
7409 : {
7410 16 : return FillFieldBinary<uint64_t>(array, iOGRFieldIdx, iFeature,
7411 : iArrowIdx, asFieldInfo, osFieldPrefix,
7412 16 : fieldName, oFeature);
7413 : }
7414 822 : else if (asFieldInfo[iArrowIdx].nPrecision > 0)
7415 : {
7416 : // fits on a int64
7417 46 : CPLAssert(asFieldInfo[iArrowIdx].nPrecision <= 19);
7418 : // either 128 or 256 bits
7419 46 : CPLAssert((asFieldInfo[iArrowIdx].nWidthInBytes % 8) == 0);
7420 46 : const int nWidthIn64BitWord = asFieldInfo[iArrowIdx].nWidthInBytes / 8;
7421 :
7422 46 : if (IsList(format))
7423 : {
7424 16 : const auto panOffsets =
7425 16 : static_cast<const uint32_t *>(array->buffers[1]) +
7426 16 : array->offset;
7427 16 : const auto childArray = array->children[0];
7428 16 : std::vector<double> aValues;
7429 33 : for (auto i = panOffsets[iFeature]; i < panOffsets[iFeature + 1];
7430 : ++i)
7431 : {
7432 17 : aValues.push_back(GetValueDecimal(childArray, nWidthIn64BitWord,
7433 17 : asFieldInfo[iArrowIdx].nScale,
7434 : i));
7435 : }
7436 16 : oFeature.SetField(iOGRFieldIdx, static_cast<int>(aValues.size()),
7437 16 : aValues.data());
7438 16 : return true;
7439 : }
7440 30 : else if (IsLargeList(format))
7441 : {
7442 4 : const auto panOffsets =
7443 4 : static_cast<const uint64_t *>(array->buffers[1]) +
7444 4 : array->offset;
7445 4 : const auto childArray = array->children[0];
7446 4 : std::vector<double> aValues;
7447 4 : for (auto i = static_cast<size_t>(panOffsets[iFeature]);
7448 9 : i < static_cast<size_t>(panOffsets[iFeature + 1]); ++i)
7449 : {
7450 5 : aValues.push_back(GetValueDecimal(childArray, nWidthIn64BitWord,
7451 5 : asFieldInfo[iArrowIdx].nScale,
7452 : i));
7453 : }
7454 4 : oFeature.SetField(iOGRFieldIdx, static_cast<int>(aValues.size()),
7455 4 : aValues.data());
7456 4 : return true;
7457 : }
7458 26 : else if (IsFixedSizeList(format))
7459 : {
7460 4 : const int nVals = GetFixedSizeList(format);
7461 4 : const auto childArray = array->children[0];
7462 4 : std::vector<double> aValues;
7463 12 : for (int i = 0; i < nVals; ++i)
7464 : {
7465 8 : aValues.push_back(GetValueDecimal(childArray, nWidthIn64BitWord,
7466 8 : asFieldInfo[iArrowIdx].nScale,
7467 8 : iFeature * nVals + i));
7468 : }
7469 4 : oFeature.SetField(iOGRFieldIdx, nVals, aValues.data());
7470 4 : return true;
7471 : }
7472 :
7473 22 : CPLAssert(format[0] == ARROW_LETTER_DECIMAL);
7474 :
7475 22 : oFeature.SetFieldSameTypeUnsafe(
7476 : iOGRFieldIdx,
7477 : GetValueDecimal(array, nWidthIn64BitWord,
7478 22 : asFieldInfo[iArrowIdx].nScale, iFeature));
7479 22 : return true;
7480 : }
7481 776 : else if (SetFieldForOtherFormats(
7482 : oFeature, iOGRFieldIdx,
7483 776 : static_cast<size_t>(iFeature + array->offset), schema, array))
7484 : {
7485 776 : return true;
7486 : }
7487 :
7488 0 : CPLError(CE_Failure, CPLE_NotSupported, "%s",
7489 0 : ("Type '" + std::string(format) + "' for field " + osFieldPrefix +
7490 0 : fieldName + " is not supported.")
7491 : .c_str());
7492 0 : return false;
7493 : }
7494 :
7495 : /************************************************************************/
7496 : /* OGRLayer::WriteArrowBatch() */
7497 : /************************************************************************/
7498 :
7499 : // clang-format off
7500 : /** Writes a batch of rows from an ArrowArray.
7501 : *
7502 : * This is semantically close to calling CreateFeature() with multiple features
7503 : * at once.
7504 : *
7505 : * The ArrowArray must be of type struct (format=+s), and its children generally
7506 : * map to a OGR attribute or geometry field (unless they are struct themselves).
7507 : *
7508 : * Method IsArrowSchemaSupported() can be called to determine if the schema
7509 : * will be supported by WriteArrowBatch().
7510 : *
7511 : * OGR fields for the corresponding children arrays must exist and be of a
7512 : * compatible type. For attribute fields, they should generally be created with
7513 : * CreateFieldFromArrowSchema(). This is strictly required for output drivers
7514 : * Arrow or Parquet, and strongly recommended otherwise. For geometry fields,
7515 : * they should be created either implicitly at CreateLayer() type
7516 : * (if geom_type != wkbNone), or explicitly with CreateGeomField().
7517 : *
7518 : * Starting with GDAL 3.9, some tolerance has been introduced in the base
7519 : * implementation of WriteArrowBatch() for scenarios that involve appending to
7520 : * an already existing output layer when the input Arrow field type and the
7521 : * OGR layer field type are 32/64-bi integers or real number, but do not match
7522 : * exactly, which may cause lossy conversions. The IF_FIELD_NOT_PRESERVED option
7523 : * can be used to control the behavior in case of lossy conversion.
7524 : *
7525 : * Arrays for geometry columns should be of binary or large binary type and
7526 : * contain WKB geometry.
7527 : *
7528 : * Note that the passed array may be set to a released state
7529 : * (array->release==NULL) after this call (not by the base implementation,
7530 : * but in specialized ones such as Parquet or Arrow for example)
7531 : *
7532 : * Supported options of the base implementation are:
7533 : * <ul>
7534 : * <li>FID=name. Name of the FID column in the array. If not provided,
7535 : * GetFIDColumn() is used to determine it. The special name
7536 : * OGRLayer::DEFAULT_ARROW_FID_NAME is also recognized if neither FID nor
7537 : * GetFIDColumn() are set.
7538 : * The corresponding ArrowArray must be of type int32 (i) or int64 (l).
7539 : * On input, values of the FID column are used to create the feature.
7540 : * On output, the values of the FID column may be set with the FID of the
7541 : * created feature (if the array is not released).
7542 : * </li>
7543 : * <li>IF_FID_NOT_PRESERVED=NOTHING/ERROR/WARNING. Action to perform when the
7544 : * input FID is not preserved in the output layer. The default is NOTHING.
7545 : * Setting it to ERROR will cause the function to error out. Setting it
7546 : * to WARNING will cause the function to emit a warning but continue its
7547 : * processing.
7548 : * </li>
7549 : * <li>IF_FIELD_NOT_PRESERVED=ERROR/WARNING. (since GDAL 3.9)
7550 : * Action to perform when the input field value is not preserved in the
7551 : * output layer.
7552 : * The default is WARNING, which will cause the function to emit a warning
7553 : * but continue its processing.
7554 : * Setting it to ERROR will cause the function to error out if a lossy
7555 : * conversion is detected.
7556 : * </li>
7557 : * <li>GEOMETRY_NAME=name. Name of the geometry column. If not provided,
7558 : * GetGeometryColumn() is used. The special name
7559 : * OGRLayer::DEFAULT_ARROW_GEOMETRY_NAME is also recognized if neither
7560 : * GEOMETRY_NAME nor GetGeometryColumn() are set.
7561 : * Geometry columns are also identified if they have
7562 : * ARROW:extension:name=ogc.wkb as a field metadata.
7563 : * The corresponding ArrowArray must be of type binary (w) or large
7564 : * binary (W).
7565 : * </li>
7566 : * </ul>
7567 : *
7568 : * The following example demonstrates how to copy a layer from one format to
7569 : * another one (assuming it has at most a single geometry column):
7570 : \code{.py}
7571 : def copy_layer(src_lyr, out_filename, out_format, lcos = {}):
7572 : stream = src_lyr.GetArrowStream()
7573 : schema = stream.GetSchema()
7574 :
7575 : # If the source layer has a FID column and the output driver supports
7576 : # a FID layer creation option, set it to the source FID column name.
7577 : if src_lyr.GetFIDColumn():
7578 : creationOptions = gdal.GetDriverByName(out_format).GetMetadataItem(
7579 : "DS_LAYER_CREATIONOPTIONLIST"
7580 : )
7581 : if creationOptions and '"FID"' in creationOptions:
7582 : lcos["FID"] = src_lyr.GetFIDColumn()
7583 :
7584 : with ogr.GetDriverByName(out_format).CreateDataSource(out_filename) as out_ds:
7585 : if src_lyr.GetLayerDefn().GetGeomFieldCount() > 1:
7586 : out_lyr = out_ds.CreateLayer(
7587 : src_lyr.GetName(), geom_type=ogr.wkbNone, options=lcos
7588 : )
7589 : for i in range(src_lyr.GetLayerDefn().GetGeomFieldCount()):
7590 : out_lyr.CreateGeomField(src_lyr.GetLayerDefn().GetGeomFieldDefn(i))
7591 : else:
7592 : out_lyr = out_ds.CreateLayer(
7593 : src_lyr.GetName(),
7594 : geom_type=src_lyr.GetGeomType(),
7595 : srs=src_lyr.GetSpatialRef(),
7596 : options=lcos,
7597 : )
7598 :
7599 : success, error_msg = out_lyr.IsArrowSchemaSupported(schema)
7600 : assert success, error_msg
7601 :
7602 : src_geom_field_names = [
7603 : src_lyr.GetLayerDefn().GetGeomFieldDefn(i).GetName()
7604 : for i in range(src_lyr.GetLayerDefn().GetGeomFieldCount())
7605 : ]
7606 : for i in range(schema.GetChildrenCount()):
7607 : # GetArrowStream() may return "OGC_FID" for a unnamed source FID
7608 : # column and "wkb_geometry" for a unnamed source geometry column.
7609 : # Also test GetFIDColumn() and src_geom_field_names if they are
7610 : # named.
7611 : if (
7612 : schema.GetChild(i).GetName()
7613 : not in ("OGC_FID", "wkb_geometry", src_lyr.GetFIDColumn())
7614 : and schema.GetChild(i).GetName() not in src_geom_field_names
7615 : ):
7616 : out_lyr.CreateFieldFromArrowSchema(schema.GetChild(i))
7617 :
7618 : write_options = []
7619 : if src_lyr.GetFIDColumn():
7620 : write_options.append("FID=" + src_lyr.GetFIDColumn())
7621 : if (
7622 : src_lyr.GetLayerDefn().GetGeomFieldCount() == 1
7623 : and src_lyr.GetGeometryColumn()
7624 : ):
7625 : write_options.append("GEOMETRY_NAME=" + src_lyr.GetGeometryColumn())
7626 :
7627 : while True:
7628 : array = stream.GetNextRecordBatch()
7629 : if array is None:
7630 : break
7631 : out_lyr.WriteArrowBatch(schema, array, write_options)
7632 : \endcode
7633 : *
7634 : * This method and CreateFeature() are mutually exclusive in the same session.
7635 : *
7636 : * This method is the same as the C function OGR_L_WriteArrowBatch().
7637 : *
7638 : * @param schema Schema of array
7639 : * @param array Array of type struct. It may be released (array->release==NULL)
7640 : * after calling this method.
7641 : * @param papszOptions Options. Null terminated list, or nullptr.
7642 : * @return true in case of success
7643 : * @since 3.8
7644 : */
7645 : // clang-format on
7646 :
7647 84 : bool OGRLayer::WriteArrowBatch(const struct ArrowSchema *schema,
7648 : struct ArrowArray *array,
7649 : CSLConstList papszOptions)
7650 : {
7651 84 : const char *format = schema->format;
7652 84 : if (!IsStructure(format))
7653 : {
7654 0 : CPLError(CE_Failure, CPLE_AppDefined,
7655 : "WriteArrowBatch() should be called on a schema that is a "
7656 : "struct of fields");
7657 0 : return false;
7658 : }
7659 :
7660 84 : if (schema->n_children != array->n_children)
7661 : {
7662 0 : CPLError(CE_Failure, CPLE_AppDefined,
7663 : "WriteArrowBatch(): schema->n_children (%d) != "
7664 : "array->n_children (%d)",
7665 0 : int(schema->n_children), int(array->n_children));
7666 0 : return false;
7667 : }
7668 :
7669 168 : CPLStringList aosNativeTypes;
7670 84 : auto poDS = const_cast<OGRLayer *>(this)->GetDataset();
7671 84 : if (poDS)
7672 : {
7673 84 : auto poDriver = poDS->GetDriver();
7674 84 : if (poDriver)
7675 : {
7676 : const char *pszMetadataItem =
7677 84 : poDriver->GetMetadataItem(GDAL_DMD_CREATIONFIELDDATATYPES);
7678 84 : if (pszMetadataItem)
7679 84 : aosNativeTypes = CSLTokenizeString2(pszMetadataItem, " ", 0);
7680 : }
7681 : }
7682 :
7683 168 : std::vector<FieldInfo> asFieldInfo;
7684 84 : auto poLayerDefn = GetLayerDefn();
7685 : const char *pszFIDName =
7686 84 : CSLFetchNameValueDef(papszOptions, "FID", GetFIDColumn());
7687 84 : if (!pszFIDName || pszFIDName[0] == 0)
7688 59 : pszFIDName = DEFAULT_ARROW_FID_NAME;
7689 : const bool bErrorIfFIDNotPreserved =
7690 84 : EQUAL(CSLFetchNameValueDef(papszOptions, "IF_FID_NOT_PRESERVED", ""),
7691 : "ERROR");
7692 : const bool bWarningIfFIDNotPreserved =
7693 84 : EQUAL(CSLFetchNameValueDef(papszOptions, "IF_FID_NOT_PRESERVED", ""),
7694 : "WARNING");
7695 : const bool bErrorIfFieldNotPreserved =
7696 84 : EQUAL(CSLFetchNameValueDef(papszOptions, "IF_FIELD_NOT_PRESERVED", ""),
7697 : "ERROR");
7698 84 : const char *pszGeomFieldName = CSLFetchNameValueDef(
7699 84 : papszOptions, "GEOMETRY_NAME", GetGeometryColumn());
7700 84 : if (!pszGeomFieldName || pszGeomFieldName[0] == 0)
7701 58 : pszGeomFieldName = DEFAULT_ARROW_GEOMETRY_NAME;
7702 84 : const struct ArrowSchema *schemaFIDColumn = nullptr;
7703 84 : struct ArrowArray *arrayFIDColumn = nullptr;
7704 84 : bool bFallbackTypesUsed = false;
7705 824 : for (int64_t i = 0; i < schema->n_children; ++i)
7706 : {
7707 741 : if (!BuildOGRFieldInfo(schema->children[i], array->children[i],
7708 741 : poLayerDefn, std::string(), aosNativeTypes,
7709 : bFallbackTypesUsed, asFieldInfo, pszFIDName,
7710 : pszGeomFieldName, this,
7711 741 : m_poPrivate->m_oMapArrowFieldNameToOGRFieldName,
7712 : schemaFIDColumn, arrayFIDColumn))
7713 : {
7714 1 : return false;
7715 : }
7716 : }
7717 :
7718 166 : std::map<int, int> oMapOGRFieldIndexToFieldInfoIndex;
7719 166 : std::vector<bool> abUseStringOptim(poLayerDefn->GetFieldCount(), false);
7720 843 : for (int i = 0; i < static_cast<int>(asFieldInfo.size()); ++i)
7721 : {
7722 760 : if (asFieldInfo[i].iOGRFieldIdx >= 0 && !asFieldInfo[i].bIsGeomCol)
7723 : {
7724 651 : CPLAssert(oMapOGRFieldIndexToFieldInfoIndex.find(
7725 : asFieldInfo[i].iOGRFieldIdx) ==
7726 : oMapOGRFieldIndexToFieldInfoIndex.end());
7727 651 : oMapOGRFieldIndexToFieldInfoIndex[asFieldInfo[i].iOGRFieldIdx] = i;
7728 1302 : abUseStringOptim[asFieldInfo[i].iOGRFieldIdx] =
7729 1302 : asFieldInfo[i].bUseStringOptim;
7730 : }
7731 : }
7732 :
7733 166 : OGRFeatureDefn oLayerDefnTmp(poLayerDefn->GetName());
7734 :
7735 : struct LayerDefnTmpRefReleaser
7736 : {
7737 : OGRFeatureDefn &m_oDefn;
7738 :
7739 83 : explicit LayerDefnTmpRefReleaser(OGRFeatureDefn &oDefn) : m_oDefn(oDefn)
7740 : {
7741 83 : m_oDefn.Reference();
7742 83 : }
7743 :
7744 83 : ~LayerDefnTmpRefReleaser()
7745 83 : {
7746 83 : m_oDefn.Dereference();
7747 83 : }
7748 : };
7749 :
7750 166 : LayerDefnTmpRefReleaser oLayerDefnTmpRefReleaser(oLayerDefnTmp);
7751 :
7752 166 : std::vector<int> anIdentityFieldMap;
7753 83 : if (bFallbackTypesUsed)
7754 : {
7755 29 : oLayerDefnTmp.SetGeomType(wkbNone);
7756 98 : for (int i = 0; i < poLayerDefn->GetFieldCount(); ++i)
7757 : {
7758 69 : anIdentityFieldMap.push_back(i);
7759 69 : const auto poSrcFieldDefn = poLayerDefn->GetFieldDefn(i);
7760 69 : const auto oIter = oMapOGRFieldIndexToFieldInfoIndex.find(i);
7761 : OGRFieldDefn oFieldDefn(
7762 : poSrcFieldDefn->GetNameRef(),
7763 69 : oIter == oMapOGRFieldIndexToFieldInfoIndex.end()
7764 1 : ? poSrcFieldDefn->GetType()
7765 139 : : asFieldInfo[oIter->second].eNominalFieldType);
7766 69 : if (oIter != oMapOGRFieldIndexToFieldInfoIndex.end())
7767 68 : asFieldInfo[oIter->second].eSetFeatureFieldType =
7768 68 : asFieldInfo[oIter->second].eNominalFieldType;
7769 69 : oLayerDefnTmp.AddFieldDefn(&oFieldDefn);
7770 : }
7771 57 : for (int i = 0; i < poLayerDefn->GetGeomFieldCount(); ++i)
7772 : {
7773 28 : oLayerDefnTmp.AddGeomFieldDefn(poLayerDefn->GetGeomFieldDefn(i));
7774 : }
7775 : }
7776 : else
7777 : {
7778 715 : for (auto &sFieldInfo : asFieldInfo)
7779 661 : sFieldInfo.eSetFeatureFieldType = sFieldInfo.eTargetFieldType;
7780 : }
7781 :
7782 : struct FeatureCleaner
7783 : {
7784 : OGRFeature &m_oFeature;
7785 : const std::vector<bool> &m_abUseStringOptim;
7786 :
7787 83 : explicit FeatureCleaner(OGRFeature &oFeature,
7788 : const std::vector<bool> &abUseStringOptim)
7789 83 : : m_oFeature(oFeature), m_abUseStringOptim(abUseStringOptim)
7790 : {
7791 83 : }
7792 :
7793 : // As we set a value that can't be CPLFree()'d in the .String member
7794 : // of string fields, we must take care of manually unsetting it before
7795 : // the destructor of OGRFeature gets called.
7796 83 : ~FeatureCleaner()
7797 83 : {
7798 83 : const auto poLayerDefn = m_oFeature.GetDefnRef();
7799 83 : const int nFieldCount = poLayerDefn->GetFieldCount();
7800 738 : for (int i = 0; i < nFieldCount; ++i)
7801 : {
7802 655 : if (m_abUseStringOptim[i])
7803 : {
7804 128 : if (m_oFeature.IsFieldSetAndNotNullUnsafe(i))
7805 100 : m_oFeature.SetFieldSameTypeUnsafe(
7806 : i, static_cast<char *>(nullptr));
7807 : }
7808 : }
7809 83 : }
7810 : };
7811 :
7812 166 : OGRFeature oFeature(bFallbackTypesUsed ? &oLayerDefnTmp : poLayerDefn);
7813 166 : FeatureCleaner oCleaner(oFeature, abUseStringOptim);
7814 166 : OGRFeature oFeatureTarget(poLayerDefn);
7815 83 : OGRFeature *const poFeatureTarget =
7816 83 : bFallbackTypesUsed ? &oFeatureTarget : &oFeature;
7817 :
7818 : // We accumulate the content of all strings in osWorkingBuffer to avoid
7819 : // a few dynamic memory allocations
7820 166 : std::string osWorkingBuffer;
7821 :
7822 : bool bTransactionOK;
7823 : {
7824 83 : CPLErrorStateBackuper oBackuper(CPLQuietErrorHandler);
7825 83 : bTransactionOK = StartTransaction() == OGRERR_NONE;
7826 : }
7827 :
7828 166 : const std::string emptyString;
7829 83 : int64_t fidNullCount = 0;
7830 60731 : for (size_t iFeature = 0; iFeature < static_cast<size_t>(array->length);
7831 : ++iFeature)
7832 : {
7833 60658 : oFeature.SetFID(OGRNullFID);
7834 :
7835 60658 : int iArrowIdx = 0;
7836 60658 : const size_t nWorkingBufferSize = GetWorkingBufferSize(
7837 : schema, array, iFeature, iArrowIdx, asFieldInfo);
7838 60658 : osWorkingBuffer.clear();
7839 60658 : osWorkingBuffer.reserve(nWorkingBufferSize);
7840 : #ifdef DEBUG
7841 60658 : const char *pszWorkingBuffer = osWorkingBuffer.c_str();
7842 60658 : CPL_IGNORE_RET_VAL(pszWorkingBuffer);
7843 : #endif
7844 60658 : iArrowIdx = 0;
7845 1383780 : for (int64_t i = 0; i < schema->n_children; ++i)
7846 : {
7847 1323130 : if (!FillFeature(this, schema->children[i], array->children[i],
7848 : emptyString, iFeature, iArrowIdx, asFieldInfo,
7849 : oFeature, osWorkingBuffer))
7850 : {
7851 2 : if (bTransactionOK)
7852 2 : RollbackTransaction();
7853 10 : return false;
7854 : }
7855 : }
7856 : #ifdef DEBUG
7857 : // Check that the buffer didn't get reallocated
7858 60656 : CPLAssert(pszWorkingBuffer == osWorkingBuffer.c_str());
7859 60656 : CPLAssert(osWorkingBuffer.size() == nWorkingBufferSize);
7860 : #endif
7861 :
7862 60656 : if (bFallbackTypesUsed)
7863 : {
7864 44 : oFeatureTarget.SetFrom(&oFeature, anIdentityFieldMap.data(),
7865 : /*bForgiving=*/true,
7866 : /*bUseISO8601ForDateTimeAsString=*/true);
7867 44 : oFeatureTarget.SetFID(oFeature.GetFID());
7868 :
7869 44 : if (bErrorIfFieldNotPreserved)
7870 : {
7871 26 : for (int i = 0; i < poLayerDefn->GetFieldCount(); ++i)
7872 : {
7873 16 : if (!oFeature.IsFieldSetAndNotNullUnsafe(i))
7874 : {
7875 4 : continue;
7876 : }
7877 12 : bool bLossyConversion = false;
7878 : const auto eSrcType =
7879 12 : oLayerDefnTmp.GetFieldDefnUnsafe(i)->GetType();
7880 : const auto eDstType =
7881 12 : poLayerDefn->GetFieldDefnUnsafe(i)->GetType();
7882 :
7883 : const auto IsDoubleCastToInt64EqualTInt64 =
7884 2 : [](double dfVal, int64_t nOtherVal)
7885 : {
7886 : // Values in the range [INT64_MAX - 1023, INT64_MAX - 1]
7887 : // get converted to a double that once cast to int64_t
7888 : // is INT64_MAX + 1, hence the strict < comparison
7889 : return dfVal >=
7890 2 : static_cast<double>(
7891 2 : std::numeric_limits<int64_t>::min()) &&
7892 : dfVal <
7893 2 : static_cast<double>(
7894 4 : std::numeric_limits<int64_t>::max()) &&
7895 3 : static_cast<int64_t>(dfVal) == nOtherVal;
7896 : };
7897 :
7898 14 : if (eSrcType == OFTInteger64 && eDstType == OFTInteger &&
7899 2 : oFeatureTarget.GetFieldAsIntegerUnsafe(i) !=
7900 2 : oFeature.GetFieldAsInteger64Unsafe(i))
7901 : {
7902 1 : bLossyConversion = true;
7903 : }
7904 14 : else if (eSrcType == OFTReal && eDstType == OFTInteger &&
7905 3 : oFeatureTarget.GetFieldAsIntegerUnsafe(i) !=
7906 3 : oFeature.GetFieldAsDoubleUnsafe(i))
7907 : {
7908 2 : bLossyConversion = true;
7909 : }
7910 12 : else if (eSrcType == OFTReal && eDstType == OFTInteger64 &&
7911 3 : static_cast<double>(
7912 3 : oFeatureTarget.GetFieldAsInteger64Unsafe(i)) !=
7913 3 : oFeature.GetFieldAsDoubleUnsafe(i))
7914 : {
7915 2 : bLossyConversion = true;
7916 : }
7917 9 : else if (eSrcType == OFTInteger64 && eDstType == OFTReal &&
7918 2 : !IsDoubleCastToInt64EqualTInt64(
7919 : oFeatureTarget.GetFieldAsDoubleUnsafe(i),
7920 2 : oFeature.GetFieldAsInteger64Unsafe(i)))
7921 : {
7922 1 : bLossyConversion = true;
7923 : }
7924 12 : if (bLossyConversion)
7925 : {
7926 6 : CPLError(CE_Failure, CPLE_AppDefined,
7927 : "For feature " CPL_FRMT_GIB
7928 : ", value of field %s cannot not preserved",
7929 : oFeatureTarget.GetFID(),
7930 : oLayerDefnTmp.GetFieldDefn(i)->GetNameRef());
7931 6 : if (bTransactionOK)
7932 6 : RollbackTransaction();
7933 6 : return false;
7934 : }
7935 : }
7936 : }
7937 : }
7938 :
7939 60650 : const auto nInputFID = poFeatureTarget->GetFID();
7940 60650 : if (CreateFeature(poFeatureTarget) != OGRERR_NONE)
7941 : {
7942 1 : if (bTransactionOK)
7943 1 : RollbackTransaction();
7944 1 : return false;
7945 : }
7946 60649 : if (nInputFID != OGRNullFID)
7947 : {
7948 121109 : if (bWarningIfFIDNotPreserved &&
7949 : // cppcheck-suppress knownConditionTrueFalse
7950 60543 : poFeatureTarget->GetFID() != nInputFID)
7951 : {
7952 2 : CPLError(CE_Warning, CPLE_AppDefined,
7953 : "Feature id " CPL_FRMT_GIB " not preserved",
7954 : nInputFID);
7955 : }
7956 60565 : else if (bErrorIfFIDNotPreserved &&
7957 : // cppcheck-suppress knownConditionTrueFalse
7958 1 : poFeatureTarget->GetFID() != nInputFID)
7959 : {
7960 1 : CPLError(CE_Failure, CPLE_AppDefined,
7961 : "Feature id " CPL_FRMT_GIB " not preserved",
7962 : nInputFID);
7963 1 : if (bTransactionOK)
7964 1 : RollbackTransaction();
7965 1 : return false;
7966 : }
7967 : }
7968 :
7969 60648 : if (arrayFIDColumn)
7970 : {
7971 60570 : uint8_t *pabyValidity = static_cast<uint8_t *>(
7972 60570 : const_cast<void *>(arrayFIDColumn->buffers[0]));
7973 60570 : if (IsInt32(schemaFIDColumn->format))
7974 : {
7975 6 : auto *panValues = static_cast<int32_t *>(
7976 6 : const_cast<void *>(arrayFIDColumn->buffers[1]));
7977 6 : if (poFeatureTarget->GetFID() >
7978 6 : std::numeric_limits<int32_t>::max())
7979 : {
7980 0 : if (pabyValidity)
7981 : {
7982 0 : ++fidNullCount;
7983 0 : UnsetBit(pabyValidity,
7984 0 : static_cast<size_t>(iFeature +
7985 0 : arrayFIDColumn->offset));
7986 : }
7987 0 : CPLError(CE_Warning, CPLE_AppDefined,
7988 : "FID " CPL_FRMT_GIB
7989 : " cannot be stored in FID array of type int32",
7990 : poFeatureTarget->GetFID());
7991 : }
7992 : else
7993 : {
7994 6 : if (pabyValidity)
7995 : {
7996 5 : SetBit(pabyValidity,
7997 5 : static_cast<size_t>(iFeature +
7998 5 : arrayFIDColumn->offset));
7999 : }
8000 6 : panValues[iFeature + arrayFIDColumn->offset] =
8001 6 : static_cast<int32_t>(poFeatureTarget->GetFID());
8002 : }
8003 : }
8004 60564 : else if (IsInt64(schemaFIDColumn->format))
8005 : {
8006 60564 : if (pabyValidity)
8007 : {
8008 0 : SetBit(
8009 : pabyValidity,
8010 0 : static_cast<size_t>(iFeature + arrayFIDColumn->offset));
8011 : }
8012 60564 : auto *panValues = static_cast<int64_t *>(
8013 60564 : const_cast<void *>(arrayFIDColumn->buffers[1]));
8014 60564 : panValues[iFeature + arrayFIDColumn->offset] =
8015 60564 : poFeatureTarget->GetFID();
8016 : }
8017 : else
8018 : {
8019 0 : CPLAssert(false);
8020 : }
8021 : }
8022 : }
8023 73 : if (arrayFIDColumn && arrayFIDColumn->buffers[0])
8024 : {
8025 1 : arrayFIDColumn->null_count = fidNullCount;
8026 : }
8027 :
8028 73 : bool bRet = true;
8029 73 : if (bTransactionOK)
8030 66 : bRet = CommitTransaction() == OGRERR_NONE;
8031 :
8032 73 : return bRet;
8033 : }
8034 :
8035 : /************************************************************************/
8036 : /* OGR_L_WriteArrowBatch() */
8037 : /************************************************************************/
8038 :
8039 : // clang-format off
8040 : /** Writes a batch of rows from an ArrowArray.
8041 : *
8042 : * This is semantically close to calling CreateFeature() with multiple features
8043 : * at once.
8044 : *
8045 : * The ArrowArray must be of type struct (format=+s), and its children generally
8046 : * map to a OGR attribute or geometry field (unless they are struct themselves).
8047 : *
8048 : * Method IsArrowSchemaSupported() can be called to determine if the schema
8049 : * will be supported by WriteArrowBatch().
8050 : *
8051 : * OGR fields for the corresponding children arrays must exist and be of a
8052 : * compatible type. For attribute fields, they should generally be created with
8053 : * CreateFieldFromArrowSchema(). This is strictly required for output drivers
8054 : * Arrow or Parquet, and strongly recommended otherwise. For geometry fields,
8055 : * they should be created either implicitly at CreateLayer() type
8056 : * (if geom_type != wkbNone), or explicitly with CreateGeomField().
8057 : *
8058 : * Starting with GDAL 3.9, some tolerance has been introduced in the base
8059 : * implementation of WriteArrowBatch() for scenarios that involve appending to
8060 : * an already existing output layer when the input Arrow field type and the
8061 : * OGR layer field type are 32/64-bi integers or real number, but do not match
8062 : * exactly, which may cause lossy conversions. The IF_FIELD_NOT_PRESERVED option
8063 : * can be used to control the behavior in case of lossy conversion.
8064 : *
8065 : * Arrays for geometry columns should be of binary or large binary type and
8066 : * contain WKB geometry.
8067 : *
8068 : * Note that the passed array may be set to a released state
8069 : * (array->release==NULL) after this call (not by the base implementation,
8070 : * but in specialized ones such as Parquet or Arrow for example)
8071 : *
8072 : * Supported options of the base implementation are:
8073 : * <ul>
8074 : * <li>FID=name. Name of the FID column in the array. If not provided,
8075 : * GetFIDColumn() is used to determine it. The special name
8076 : * OGRLayer::DEFAULT_ARROW_FID_NAME is also recognized if neither FID nor
8077 : * GetFIDColumn() are set.
8078 : * The corresponding ArrowArray must be of type int32 (i) or int64 (l).
8079 : * On input, values of the FID column are used to create the feature.
8080 : * On output, the values of the FID column may be set with the FID of the
8081 : * created feature (if the array is not released).
8082 : * </li>
8083 : * <li>IF_FID_NOT_PRESERVED=NOTHING/ERROR/WARNING. Action to perform when the
8084 : * input FID is not preserved in the output layer. The default is NOTHING.
8085 : * Setting it to ERROR will cause the function to error out. Setting it
8086 : * to WARNING will cause the function to emit a warning but continue its
8087 : * processing.
8088 : * </li>
8089 : * <li>IF_FIELD_NOT_PRESERVED=ERROR/WARNING. (since GDAL 3.9)
8090 : * Action to perform when the input field value is not preserved in the
8091 : * output layer.
8092 : * The default is WARNING, which will cause the function to emit a warning
8093 : * but continue its processing.
8094 : * Setting it to ERROR will cause the function to error out if a lossy
8095 : * conversion is detected.
8096 : * </li>
8097 : * <li>GEOMETRY_NAME=name. Name of the geometry column. If not provided,
8098 : * GetGeometryColumn() is used. The special name
8099 : * OGRLayer::DEFAULT_ARROW_GEOMETRY_NAME is also recognized if neither
8100 : * GEOMETRY_NAME nor GetGeometryColumn() are set.
8101 : * Geometry columns are also identified if they have
8102 : * ARROW:extension:name=ogc.wkb as a field metadata.
8103 : * The corresponding ArrowArray must be of type binary (w) or large
8104 : * binary (W).
8105 : * </li>
8106 : * </ul>
8107 : *
8108 : * The following example demonstrates how to copy a layer from one format to
8109 : * another one (assuming it has at most a single geometry column):
8110 : \code{.py}
8111 : def copy_layer(src_lyr, out_filename, out_format, lcos = {}):
8112 : stream = src_lyr.GetArrowStream()
8113 : schema = stream.GetSchema()
8114 :
8115 : # If the source layer has a FID column and the output driver supports
8116 : # a FID layer creation option, set it to the source FID column name.
8117 : if src_lyr.GetFIDColumn():
8118 : creationOptions = gdal.GetDriverByName(out_format).GetMetadataItem(
8119 : "DS_LAYER_CREATIONOPTIONLIST"
8120 : )
8121 : if creationOptions and '"FID"' in creationOptions:
8122 : lcos["FID"] = src_lyr.GetFIDColumn()
8123 :
8124 : with ogr.GetDriverByName(out_format).CreateDataSource(out_filename) as out_ds:
8125 : if src_lyr.GetLayerDefn().GetGeomFieldCount() > 1:
8126 : out_lyr = out_ds.CreateLayer(
8127 : src_lyr.GetName(), geom_type=ogr.wkbNone, options=lcos
8128 : )
8129 : for i in range(src_lyr.GetLayerDefn().GetGeomFieldCount()):
8130 : out_lyr.CreateGeomField(src_lyr.GetLayerDefn().GetGeomFieldDefn(i))
8131 : else:
8132 : out_lyr = out_ds.CreateLayer(
8133 : src_lyr.GetName(),
8134 : geom_type=src_lyr.GetGeomType(),
8135 : srs=src_lyr.GetSpatialRef(),
8136 : options=lcos,
8137 : )
8138 :
8139 : success, error_msg = out_lyr.IsArrowSchemaSupported(schema)
8140 : assert success, error_msg
8141 :
8142 : src_geom_field_names = [
8143 : src_lyr.GetLayerDefn().GetGeomFieldDefn(i).GetName()
8144 : for i in range(src_lyr.GetLayerDefn().GetGeomFieldCount())
8145 : ]
8146 : for i in range(schema.GetChildrenCount()):
8147 : # GetArrowStream() may return "OGC_FID" for a unnamed source FID
8148 : # column and "wkb_geometry" for a unnamed source geometry column.
8149 : # Also test GetFIDColumn() and src_geom_field_names if they are
8150 : # named.
8151 : if (
8152 : schema.GetChild(i).GetName()
8153 : not in ("OGC_FID", "wkb_geometry", src_lyr.GetFIDColumn())
8154 : and schema.GetChild(i).GetName() not in src_geom_field_names
8155 : ):
8156 : out_lyr.CreateFieldFromArrowSchema(schema.GetChild(i))
8157 :
8158 : write_options = []
8159 : if src_lyr.GetFIDColumn():
8160 : write_options.append("FID=" + src_lyr.GetFIDColumn())
8161 : if (
8162 : src_lyr.GetLayerDefn().GetGeomFieldCount() == 1
8163 : and src_lyr.GetGeometryColumn()
8164 : ):
8165 : write_options.append("GEOMETRY_NAME=" + src_lyr.GetGeometryColumn())
8166 :
8167 : while True:
8168 : array = stream.GetNextRecordBatch()
8169 : if array is None:
8170 : break
8171 : out_lyr.WriteArrowBatch(schema, array, write_options)
8172 : \endcode
8173 : *
8174 : * This method and CreateFeature() are mutually exclusive in the same session.
8175 : *
8176 : * This method is the same as the C++ method OGRLayer::WriteArrowBatch().
8177 : *
8178 : * @param hLayer Layer.
8179 : * @param schema Schema of array.
8180 : * @param array Array of type struct. It may be released (array->release==NULL)
8181 : * after calling this method.
8182 : * @param papszOptions Options. Null terminated list, or nullptr.
8183 : * @return true in case of success
8184 : * @since 3.8
8185 : */
8186 : // clang-format on
8187 :
8188 59 : bool OGR_L_WriteArrowBatch(OGRLayerH hLayer, const struct ArrowSchema *schema,
8189 : struct ArrowArray *array, char **papszOptions)
8190 : {
8191 59 : VALIDATE_POINTER1(hLayer, __func__, false);
8192 59 : VALIDATE_POINTER1(schema, __func__, false);
8193 59 : VALIDATE_POINTER1(array, __func__, false);
8194 :
8195 118 : return OGRLayer::FromHandle(hLayer)->WriteArrowBatch(schema, array,
8196 59 : papszOptions);
8197 : }
|