Line data Source code
1 : /******************************************************************************
2 : *
3 : * Project: OpenGIS Simple Features Reference Implementation
4 : * Purpose: Helper to fill ArrowArray
5 : * Author: Even Rouault <even dot rouault at spatialys.com>
6 : *
7 : ******************************************************************************
8 : * Copyright (c) 2022, Even Rouault <even dot rouault at spatialys.com>
9 : *
10 : * SPDX-License-Identifier: MIT
11 : ****************************************************************************/
12 :
13 : #pragma once
14 :
15 : //! @cond Doxygen_Suppress
16 :
17 : #include <algorithm>
18 : #include <limits>
19 :
20 : #include "cpl_time.h"
21 :
22 : #include "ogrsf_frmts.h"
23 : #include "ogr_recordbatch.h"
24 :
25 : class CPL_DLL OGRArrowArrayHelper
26 : {
27 : OGRArrowArrayHelper(const OGRArrowArrayHelper &) = delete;
28 : OGRArrowArrayHelper &operator=(const OGRArrowArrayHelper &) = delete;
29 :
30 : public:
31 : bool m_bIncludeFID = false;
32 : int m_nMaxBatchSize = 0;
33 : int m_nChildren = 0;
34 : const int m_nFieldCount = 0;
35 : const int m_nGeomFieldCount = 0;
36 : std::vector<int> m_mapOGRFieldToArrowField{};
37 : std::vector<int> m_mapOGRGeomFieldToArrowField{};
38 : std::vector<bool> m_abNullableFields{};
39 : std::vector<uint32_t> m_anArrowFieldMaxAlloc{};
40 : std::vector<int> m_anTZFlags{};
41 : int64_t *m_panFIDValues = nullptr;
42 : struct ArrowArray *m_out_array = nullptr;
43 :
44 : static uint32_t GetMemLimit();
45 :
46 : static int
47 : GetMaxFeaturesInBatch(const CPLStringList &aosArrowArrayStreamOptions);
48 :
49 : OGRArrowArrayHelper(GDALDataset *poDS, OGRFeatureDefn *poFeatureDefn,
50 : const CPLStringList &aosArrowArrayStreamOptions,
51 : struct ArrowArray *out_array);
52 :
53 1667 : bool SetNull(int iArrowField, int iFeat)
54 : {
55 1667 : auto psArray = m_out_array->children[iArrowField];
56 1667 : ++psArray->null_count;
57 1667 : uint8_t *pabyNull =
58 1667 : static_cast<uint8_t *>(const_cast<void *>(psArray->buffers[0]));
59 1667 : if (psArray->buffers[0] == nullptr)
60 : {
61 : pabyNull = static_cast<uint8_t *>(
62 735 : VSI_MALLOC_ALIGNED_AUTO_VERBOSE((m_nMaxBatchSize + 7) / 8));
63 735 : if (pabyNull == nullptr)
64 : {
65 0 : return false;
66 : }
67 735 : memset(pabyNull, 0xFF, (m_nMaxBatchSize + 7) / 8);
68 735 : psArray->buffers[0] = pabyNull;
69 : }
70 1667 : pabyNull[iFeat / 8] &= static_cast<uint8_t>(~(1 << (iFeat % 8)));
71 :
72 1667 : if (psArray->n_buffers == 3)
73 : {
74 1427 : auto panOffsets =
75 1427 : static_cast<int32_t *>(const_cast<void *>(psArray->buffers[1]));
76 1427 : panOffsets[iFeat + 1] = panOffsets[iFeat];
77 : }
78 1667 : return true;
79 : }
80 :
81 151 : inline static void SetBoolOn(struct ArrowArray *psArray, int iFeat)
82 : {
83 : static_cast<uint8_t *>(
84 151 : const_cast<void *>(psArray->buffers[1]))[iFeat / 8] |=
85 151 : static_cast<uint8_t>(1 << (iFeat % 8));
86 151 : }
87 :
88 0 : inline static void SetInt8(struct ArrowArray *psArray, int iFeat,
89 : int8_t nVal)
90 : {
91 0 : static_cast<int8_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
92 : nVal;
93 0 : }
94 :
95 0 : inline static void SetUInt8(struct ArrowArray *psArray, int iFeat,
96 : uint8_t nVal)
97 : {
98 0 : static_cast<uint8_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
99 : nVal;
100 0 : }
101 :
102 132 : inline static void SetInt16(struct ArrowArray *psArray, int iFeat,
103 : int16_t nVal)
104 : {
105 132 : static_cast<int16_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
106 : nVal;
107 132 : }
108 :
109 : inline static void SetUInt16(struct ArrowArray *psArray, int iFeat,
110 : uint16_t nVal)
111 : {
112 : static_cast<uint16_t *>(
113 : const_cast<void *>(psArray->buffers[1]))[iFeat] = nVal;
114 : }
115 :
116 1660 : inline static void SetInt32(struct ArrowArray *psArray, int iFeat,
117 : int32_t nVal)
118 : {
119 1660 : static_cast<int32_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
120 : nVal;
121 1660 : }
122 :
123 : inline static void SetUInt32(struct ArrowArray *psArray, int iFeat,
124 : uint32_t nVal)
125 : {
126 : static_cast<uint32_t *>(
127 : const_cast<void *>(psArray->buffers[1]))[iFeat] = nVal;
128 : }
129 :
130 181 : inline static void SetInt64(struct ArrowArray *psArray, int iFeat,
131 : int64_t nVal)
132 : {
133 181 : static_cast<int64_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
134 : nVal;
135 181 : }
136 :
137 : inline static void SetUInt64(struct ArrowArray *psArray, int iFeat,
138 : uint64_t nVal)
139 : {
140 : static_cast<uint64_t *>(
141 : const_cast<void *>(psArray->buffers[1]))[iFeat] = nVal;
142 : }
143 :
144 111 : inline static void SetFloat(struct ArrowArray *psArray, int iFeat,
145 : float fVal)
146 : {
147 111 : static_cast<float *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
148 : fVal;
149 111 : }
150 :
151 221 : inline static void SetDouble(struct ArrowArray *psArray, int iFeat,
152 : double dfVal)
153 : {
154 221 : static_cast<double *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
155 : dfVal;
156 221 : }
157 :
158 91 : static void SetDate(struct ArrowArray *psArray, int iFeat,
159 : struct tm &brokenDown, const OGRField &ogrField)
160 : {
161 91 : brokenDown.tm_year = ogrField.Date.Year - 1900;
162 91 : brokenDown.tm_mon = ogrField.Date.Month - 1;
163 91 : brokenDown.tm_mday = ogrField.Date.Day;
164 91 : brokenDown.tm_hour = 0;
165 91 : brokenDown.tm_min = 0;
166 91 : brokenDown.tm_sec = 0;
167 91 : static_cast<int32_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
168 91 : static_cast<int>(CPLYMDHMSToUnixTime(&brokenDown) / 86400);
169 91 : }
170 :
171 115 : static void SetDateTime(struct ArrowArray *psArray, int iFeat,
172 : struct tm &brokenDown, int nFieldTZFlag,
173 : const OGRField &ogrField)
174 : {
175 115 : brokenDown.tm_year = ogrField.Date.Year - 1900;
176 115 : brokenDown.tm_mon = ogrField.Date.Month - 1;
177 115 : brokenDown.tm_mday = ogrField.Date.Day;
178 115 : brokenDown.tm_hour = ogrField.Date.Hour;
179 115 : brokenDown.tm_min = ogrField.Date.Minute;
180 115 : brokenDown.tm_sec = static_cast<int>(ogrField.Date.Second);
181 : auto nVal =
182 115 : CPLYMDHMSToUnixTime(&brokenDown) * 1000 +
183 115 : (static_cast<int>(ogrField.Date.Second * 1000 + 0.5) % 1000);
184 115 : if (nFieldTZFlag >= OGR_TZFLAG_MIXED_TZ &&
185 95 : ogrField.Date.TZFlag > OGR_TZFLAG_MIXED_TZ)
186 : {
187 : // Convert for ogrField.Date.TZFlag to UTC
188 95 : const int TZOffset = (ogrField.Date.TZFlag - OGR_TZFLAG_UTC) * 15;
189 95 : const int TZOffsetMS = TZOffset * 60 * 1000;
190 95 : nVal -= TZOffsetMS;
191 : }
192 115 : static_cast<int64_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
193 : nVal;
194 115 : }
195 :
196 1265050 : GByte *GetPtrForStringOrBinary(int iArrowField, int iFeat, size_t nLen)
197 : {
198 1265050 : auto psArray = m_out_array->children[iArrowField];
199 1265050 : auto panOffsets =
200 1265050 : static_cast<int32_t *>(const_cast<void *>(psArray->buffers[1]));
201 1265050 : const uint32_t nCurLength = static_cast<uint32_t>(panOffsets[iFeat]);
202 1265050 : if (nLen > m_anArrowFieldMaxAlloc[iArrowField] - nCurLength)
203 : {
204 404 : if (nLen >
205 404 : static_cast<uint32_t>(std::numeric_limits<int32_t>::max()) -
206 : nCurLength)
207 : {
208 0 : CPLError(CE_Failure, CPLE_AppDefined,
209 : "Too large string or binary content");
210 0 : return nullptr;
211 : }
212 404 : uint32_t nNewSize = nCurLength + static_cast<uint32_t>(nLen);
213 404 : if ((m_anArrowFieldMaxAlloc[iArrowField] >> 31) == 0)
214 : {
215 : const uint32_t nDoubleSize =
216 404 : 2U * m_anArrowFieldMaxAlloc[iArrowField];
217 404 : if (nNewSize < nDoubleSize)
218 0 : nNewSize = nDoubleSize;
219 : }
220 404 : void *newBuffer = VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nNewSize);
221 404 : if (newBuffer == nullptr)
222 0 : return nullptr;
223 404 : m_anArrowFieldMaxAlloc[iArrowField] = nNewSize;
224 404 : memcpy(newBuffer, psArray->buffers[2], nCurLength);
225 404 : VSIFreeAligned(const_cast<void *>(psArray->buffers[2]));
226 403 : psArray->buffers[2] = newBuffer;
227 : }
228 1265050 : GByte *paby =
229 1265050 : static_cast<GByte *>(const_cast<void *>(psArray->buffers[2])) +
230 1265050 : nCurLength;
231 1265050 : panOffsets[iFeat + 1] = panOffsets[iFeat] + static_cast<int32_t>(nLen);
232 1265050 : return paby;
233 : }
234 :
235 0 : static void SetEmptyStringOrBinary(struct ArrowArray *psArray, int iFeat)
236 : {
237 0 : auto panOffsets =
238 0 : static_cast<int32_t *>(const_cast<void *>(psArray->buffers[1]));
239 0 : panOffsets[iFeat + 1] = panOffsets[iFeat];
240 0 : }
241 :
242 389 : void Shrink(int nFeatures)
243 : {
244 389 : if (nFeatures < m_nMaxBatchSize)
245 : {
246 318 : m_out_array->length = nFeatures;
247 2587 : for (int i = 0; i < m_nChildren; i++)
248 : {
249 2269 : m_out_array->children[i]->length = nFeatures;
250 : }
251 : }
252 389 : }
253 :
254 17 : void ClearArray()
255 : {
256 17 : if (m_out_array->release)
257 16 : m_out_array->release(m_out_array);
258 17 : memset(m_out_array, 0, sizeof(*m_out_array));
259 17 : }
260 :
261 : static bool FillDict(struct ArrowArray *psChild,
262 : const OGRCodedFieldDomain *poCodedDomain);
263 : };
264 :
265 : //! @endcond
|