Line data Source code
1 : /******************************************************************************
2 : *
3 : * Project: GDAL TileDB Driver
4 : * Purpose: Include tiledb headers
5 : * Author: TileDB, Inc
6 : *
7 : ******************************************************************************
8 : * Copyright (c) 2019, TileDB, Inc
9 : *
10 : * Permission is hereby granted, free of charge, to any person obtaining a
11 : * copy of this software and associated documentation files (the "Software"),
12 : * to deal in the Software without restriction, including without limitation
13 : * the rights to use, copy, modify, merge, publish, distribute, sublicense,
14 : * and/or sell copies of the Software, and to permit persons to whom the
15 : * Software is furnished to do so, subject to the following conditions:
16 : *
17 : * The above copyright notice and this permission notice shall be included
18 : * in all copies or substantial portions of the Software.
19 : *
20 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
21 : * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 : * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 : * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25 : * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26 : * DEALINGS IN THE SOFTWARE.
27 : ****************************************************************************/
28 :
29 : #ifndef TILEDB_HEADERS_H
30 : #define TILEDB_HEADERS_H
31 :
32 : #include <algorithm>
33 : #include <list>
34 : #include <variant>
35 :
36 : #include "cpl_port.h"
37 : #include "cpl_string.h"
38 : #include "gdal_frmts.h"
39 : #include "gdal_pam.h"
40 : #include "ogrsf_frmts.h"
41 :
42 : #include "include_tiledb.h"
43 :
44 : #if TILEDB_VERSION_MAJOR > 2 || \
45 : (TILEDB_VERSION_MAJOR == 2 && TILEDB_VERSION_MINOR >= 17)
46 : struct gdal_tiledb_vector_of_bool
47 : {
48 : size_t m_size = 0;
49 : size_t m_capacity = 0;
50 : bool *m_v = nullptr;
51 :
52 : gdal_tiledb_vector_of_bool() = default;
53 :
54 : ~gdal_tiledb_vector_of_bool()
55 : {
56 : std::free(m_v);
57 : }
58 :
59 : gdal_tiledb_vector_of_bool(gdal_tiledb_vector_of_bool &&other)
60 : : m_size(other.m_size), m_capacity(other.m_capacity),
61 : m_v(std::move(other.m_v))
62 : {
63 : other.m_size = 0;
64 : other.m_capacity = 0;
65 : other.m_v = nullptr;
66 : }
67 :
68 : gdal_tiledb_vector_of_bool(const gdal_tiledb_vector_of_bool &) = delete;
69 : gdal_tiledb_vector_of_bool &
70 : operator=(const gdal_tiledb_vector_of_bool &) = delete;
71 : gdal_tiledb_vector_of_bool &
72 : operator=(gdal_tiledb_vector_of_bool &&) = delete;
73 :
74 : size_t size() const
75 : {
76 : return m_size;
77 : }
78 :
79 : const bool *data() const
80 : {
81 : return m_v;
82 : }
83 :
84 : bool *data()
85 : {
86 : return m_v;
87 : }
88 :
89 : bool &operator[](size_t idx)
90 : {
91 : return m_v[idx];
92 : }
93 :
94 : bool operator[](size_t idx) const
95 : {
96 : return m_v[idx];
97 : }
98 :
99 : void resize(size_t new_size)
100 : {
101 : if (new_size > m_capacity)
102 : {
103 : const size_t new_capacity =
104 : std::max<size_t>(new_size, 2 * m_capacity);
105 : bool *new_v = static_cast<bool *>(
106 : std::realloc(m_v, new_capacity * sizeof(bool)));
107 : if (!new_v)
108 : {
109 : throw std::bad_alloc();
110 : }
111 : m_v = new_v;
112 : m_capacity = new_capacity;
113 : }
114 : if (new_size > m_size)
115 : memset(m_v + m_size, 0, (new_size - m_size) * sizeof(bool));
116 : m_size = new_size;
117 : }
118 :
119 : void clear()
120 : {
121 : resize(0);
122 : }
123 :
124 : size_t capacity() const
125 : {
126 : return m_capacity;
127 : }
128 :
129 : void push_back(uint8_t v)
130 : {
131 : resize(size() + 1);
132 : m_v[size() - 1] = static_cast<bool>(v);
133 : }
134 : };
135 :
136 : #define VECTOR_OF_BOOL gdal_tiledb_vector_of_bool
137 : #define VECTOR_OF_BOOL_IS_NOT_UINT8_T
138 : #else
139 : #define VECTOR_OF_BOOL std::vector<uint8_t>
140 : #endif
141 :
142 : typedef enum
143 : {
144 : BAND = 0,
145 : PIXEL = 1,
146 : ATTRIBUTES = 2
147 : } TILEDB_INTERLEAVE_MODE;
148 :
149 : #define DEFAULT_TILE_CAPACITY 10000
150 :
151 : #define DEFAULT_BATCH_SIZE 500000
152 :
153 : constexpr const char *TILEDB_VALUES = "TDB_VALUES";
154 :
155 : constexpr const char *GDAL_ATTRIBUTE_NAME = "_gdal";
156 :
157 : /************************************************************************/
158 : /* ==================================================================== */
159 : /* TileRasterBand */
160 : /* ==================================================================== */
161 : /************************************************************************/
162 :
163 : class TileDBRasterBand;
164 :
165 : /************************************************************************/
166 : /* ==================================================================== */
167 : /* TileDBDataset */
168 : /* ==================================================================== */
169 : /************************************************************************/
170 :
171 : class TileDBDataset : public GDALPamDataset
172 : {
173 : protected:
174 : std::unique_ptr<tiledb::Context> m_ctx;
175 :
176 : public:
177 : static CPLErr AddFilter(tiledb::Context &ctx,
178 : tiledb::FilterList &filterList,
179 : const char *pszFilterName, const int level);
180 : static int Identify(GDALOpenInfo *);
181 : static CPLErr Delete(const char *pszFilename);
182 : static CPLString VSI_to_tiledb_uri(const char *pszUri);
183 :
184 : static GDALDataset *Open(GDALOpenInfo *);
185 : static GDALDataset *Create(const char *pszFilename, int nXSize, int nYSize,
186 : int nBands, GDALDataType eType,
187 : char **papszOptions);
188 : static GDALDataset *CreateCopy(const char *pszFilename,
189 : GDALDataset *poSrcDS, int bStrict,
190 : char **papszOptions,
191 : GDALProgressFunc pfnProgress,
192 : void *pProgressData);
193 :
194 : static GDALDataset *OpenMultiDimensional(GDALOpenInfo *);
195 : static GDALDataset *
196 : CreateMultiDimensional(const char *pszFilename,
197 : CSLConstList papszRootGroupOptions,
198 : CSLConstList papszOptions);
199 : };
200 :
201 : /************************************************************************/
202 : /* ==================================================================== */
203 : /* TileDRasterDataset */
204 : /* ==================================================================== */
205 : /************************************************************************/
206 :
207 : class TileDBRasterDataset final : public TileDBDataset
208 : {
209 : friend class TileDBRasterBand;
210 :
211 : protected:
212 : std::unique_ptr<tiledb::Context> m_roCtx;
213 : std::unique_ptr<tiledb::Array> m_array;
214 : std::unique_ptr<tiledb::Array> m_roArray;
215 : std::unique_ptr<tiledb::ArraySchema> m_schema;
216 : std::unique_ptr<tiledb::FilterList> m_filterList;
217 : CPLString osMetaDoc;
218 : TILEDB_INTERLEAVE_MODE eIndexMode = BAND;
219 : int nBitsPerSample = 8;
220 : GDALDataType eDataType = GDT_Unknown;
221 : int nBlockXSize = -1;
222 : int nBlockYSize = -1;
223 : int nBlocksX = 0;
224 : int nBlocksY = 0;
225 : uint64_t nBandStart = 1;
226 : bool bHasSubDatasets = false;
227 : int nSubDataCount = 0;
228 : char **papszSubDatasets = nullptr;
229 : CPLStringList m_osSubdatasetMD{};
230 : CPLXMLNode *psSubDatasetsTree = nullptr;
231 : char **papszAttributes = nullptr;
232 : std::list<std::unique_ptr<GDALDataset>> lpoAttributeDS = {};
233 : uint64_t nTimestamp = 0;
234 :
235 : bool bStats = FALSE;
236 : CPLErr IRasterIO(GDALRWFlag, int, int, int, int, void *, int, int,
237 : GDALDataType, int, int *, GSpacing, GSpacing, GSpacing,
238 : GDALRasterIOExtraArg *psExtraArg) override;
239 : CPLErr CreateAttribute(GDALDataType eType, const CPLString &osAttrName,
240 : const int nSubRasterCount = 1);
241 :
242 : CPLErr AddDimensions(tiledb::Domain &domain, const char *pszAttrName,
243 : tiledb::Dimension &y, tiledb::Dimension &x,
244 : tiledb::Dimension *poBands = nullptr);
245 :
246 : public:
247 : ~TileDBRasterDataset();
248 : CPLErr TryLoadCachedXML(char **papszSiblingFiles = nullptr,
249 : bool bReload = true);
250 : CPLErr TryLoadXML(char **papszSiblingFiles = nullptr) override;
251 : CPLErr TrySaveXML() override;
252 : char **GetMetadata(const char *pszDomain) override;
253 : virtual CPLErr FlushCache(bool bAtClosing) override;
254 : static CPLErr CopySubDatasets(GDALDataset *poSrcDS,
255 : TileDBRasterDataset *poDstDS,
256 : GDALProgressFunc pfnProgress,
257 : void *pProgressData);
258 : static TileDBRasterDataset *CreateLL(const char *pszFilename, int nXSize,
259 : int nYSize, int nBands,
260 : GDALDataType eType,
261 : char **papszOptions);
262 : static void SetBlockSize(GDALRasterBand *poBand, char **&papszOptions);
263 :
264 : static GDALDataset *Open(GDALOpenInfo *);
265 : static GDALDataset *Create(const char *pszFilename, int nXSize, int nYSize,
266 : int nBands, GDALDataType eType,
267 : char **papszOptions);
268 : static GDALDataset *CreateCopy(const char *pszFilename,
269 : GDALDataset *poSrcDS, int bStrict,
270 : char **papszOptions,
271 : GDALProgressFunc pfnProgress,
272 : void *pProgressData);
273 : };
274 :
275 : /************************************************************************/
276 : /* OGRTileDBLayer */
277 : /************************************************************************/
278 :
279 : class OGRTileDBDataset;
280 :
281 : class OGRTileDBLayer final : public OGRLayer,
282 : public OGRGetNextFeatureThroughRaw<OGRTileDBLayer>
283 : {
284 : public:
285 : typedef std::variant<std::shared_ptr<std::string>,
286 : #ifdef VECTOR_OF_BOOL_IS_NOT_UINT8_T
287 : std::shared_ptr<VECTOR_OF_BOOL>,
288 : #endif
289 : std::shared_ptr<std::vector<uint8_t>>,
290 : std::shared_ptr<std::vector<int16_t>>,
291 : std::shared_ptr<std::vector<uint16_t>>,
292 : std::shared_ptr<std::vector<int32_t>>,
293 : std::shared_ptr<std::vector<int64_t>>,
294 : std::shared_ptr<std::vector<float>>,
295 : std::shared_ptr<std::vector<double>>>
296 : ArrayType;
297 :
298 : private:
299 : friend OGRTileDBDataset;
300 : GDALDataset *m_poDS = nullptr;
301 : std::string m_osGroupName{};
302 : std::string m_osFilename{};
303 : uint64_t m_nTimestamp = 0;
304 : bool m_bUpdatable = false;
305 : enum class CurrentMode
306 : {
307 : None,
308 : ReadInProgress,
309 : WriteInProgress
310 : };
311 : CurrentMode m_eCurrentMode = CurrentMode::None;
312 : std::unique_ptr<tiledb::Context> m_ctx;
313 : std::unique_ptr<tiledb::Array> m_array;
314 : std::unique_ptr<tiledb::ArraySchema> m_schema;
315 : std::unique_ptr<tiledb::Query> m_query;
316 : std::unique_ptr<tiledb::FilterList> m_filterList;
317 : bool m_bAttributeFilterPartiallyTranslated =
318 : false; // for debugging purposes
319 : bool m_bAttributeFilterAlwaysFalse = false;
320 : bool m_bAttributeFilterAlwaysTrue = false;
321 : std::unique_ptr<tiledb::QueryCondition> m_poQueryCondition;
322 : bool m_bInitializationAttempted = false;
323 : bool m_bInitialized = false;
324 : OGRFeatureDefn *m_poFeatureDefn = nullptr;
325 : std::string m_osFIDColumn{};
326 : GIntBig m_nNextFID = 1;
327 : int64_t m_nTotalFeatureCount = -1;
328 : bool m_bStats = false;
329 : bool m_bQueryComplete = false;
330 : bool m_bGrowBuffers = false;
331 : uint64_t m_nOffsetInResultSet = 0;
332 : uint64_t m_nRowCountInResultSet = 0;
333 : int m_nUseOptimizedAttributeFilter = -1; // uninitialized
334 :
335 : tiledb_datatype_t m_eTileDBStringType = TILEDB_STRING_UTF8;
336 :
337 : std::string m_osXDim = "_X";
338 : std::string m_osYDim = "_Y";
339 : std::string m_osZDim; // may be empty
340 :
341 : // Domain extent
342 : double m_dfXStart = 0;
343 : double m_dfYStart = 0;
344 : double m_dfZStart = -10000;
345 : double m_dfXEnd = 0;
346 : double m_dfYEnd = 0;
347 : double m_dfZEnd = 10000;
348 :
349 : // Extent of all features
350 : OGREnvelope m_oLayerExtent;
351 :
352 : // Boolean shared between the OGRTileDBLayer instance and the
353 : // OGRTileDBArrowArrayPrivateData instances, that are stored in
354 : // ArrowArray::private_data, so ReleaseArrowArray() function knows
355 : // if the OGRLayer is still alive.
356 : std::shared_ptr<bool> m_pbLayerStillAlive;
357 :
358 : // Flag set to false by GetNextArrowArray() to indicate that the m_anFIDs,
359 : // m_adfXs, m_adfYs, m_adfZs, m_aFieldValues, m_aFieldValueOffsets,
360 : // m_abyGeometries and m_anGeometryOffsets are currently used by a
361 : // ArrowArray returned. If this flag is still set to false when the
362 : // next SetupQuery() is called, we need to re-instanciate new arrays, so
363 : // the ArrowArray's can be used independently of the new state of the layer.
364 : bool m_bArrowBatchReleased = true;
365 :
366 : std::shared_ptr<std::vector<int64_t>> m_anFIDs;
367 : std::shared_ptr<std::vector<double>> m_adfXs;
368 : std::shared_ptr<std::vector<double>> m_adfYs;
369 : std::shared_ptr<std::vector<double>> m_adfZs;
370 : std::vector<tiledb_datatype_t> m_aeFieldTypes{};
371 : std::vector<int> m_aeFieldTypesInCreateField{};
372 : std::vector<size_t> m_anFieldValuesCapacity{};
373 : std::vector<ArrayType> m_aFieldValues;
374 : std::vector<std::shared_ptr<std::vector<uint64_t>>> m_aFieldValueOffsets;
375 : std::vector<std::vector<uint8_t>> m_aFieldValidity;
376 : size_t m_nGeometriesCapacity = 0;
377 : std::shared_ptr<std::vector<unsigned char>> m_abyGeometries;
378 : std::shared_ptr<std::vector<uint64_t>> m_anGeometryOffsets;
379 :
380 : struct OGRTileDBArrowArrayPrivateData
381 : {
382 : OGRTileDBLayer *m_poLayer = nullptr;
383 : std::shared_ptr<bool> m_pbLayerStillAlive;
384 :
385 : ArrayType valueHolder;
386 : std::shared_ptr<std::vector<uint8_t>> nullHolder;
387 : std::shared_ptr<std::vector<uint64_t>> offsetHolder;
388 : };
389 :
390 : size_t m_nBatchSize = DEFAULT_BATCH_SIZE;
391 : size_t m_nTileCapacity = DEFAULT_TILE_CAPACITY;
392 : double m_dfTileExtent = 0;
393 : double m_dfZTileExtent = 0;
394 : size_t m_nEstimatedWkbSizePerRow = 0;
395 : std::map<std::string, size_t> m_oMapEstimatedSizePerRow;
396 : double m_dfPadX = 0;
397 : double m_dfPadY = 0;
398 : double m_dfPadZ = 0;
399 :
400 : const char *GetDatabaseGeomColName();
401 : void InitializeSchemaAndArray();
402 : void FlushArrays();
403 : void AllocateNewBuffers();
404 : void ResetBuffers();
405 : void SwitchToReadingMode();
406 : void SwitchToWritingMode();
407 : bool InitFromStorage(tiledb::Context *poCtx, uint64_t nTimestamp,
408 : CSLConstList papszOpenOptions);
409 : void SetReadBuffers(bool bGrowVariableSizeArrays);
410 : bool SetupQuery(tiledb::QueryCondition *queryCondition);
411 : OGRFeature *TranslateCurrentFeature();
412 :
413 : OGRFeature *GetNextRawFeature();
414 : std::unique_ptr<tiledb::QueryCondition>
415 : CreateQueryCondition(const swq_expr_node *poNode, bool &bAlwaysTrue,
416 : bool &bAlwaysFalse);
417 : std::unique_ptr<tiledb::QueryCondition> CreateQueryCondition(
418 : int nOperation, bool bColumnIsLeft, const swq_expr_node *poColumn,
419 : const swq_expr_node *poValue, bool &bAlwaysTrue, bool &bAlwaysFalse);
420 :
421 : static void ReleaseArrowArray(struct ArrowArray *array);
422 : void FillBoolArray(struct ArrowArray *psChild, int iField,
423 : const std::vector<bool> &abyValidityFromFilters);
424 : void SetNullBuffer(struct ArrowArray *psChild, int iField,
425 : const std::vector<bool> &abyValidityFromFilters);
426 : template <typename T>
427 : void FillPrimitiveArray(struct ArrowArray *psChild, int iField,
428 : const std::vector<bool> &abyValidityFromFilters);
429 : void FillBoolListArray(struct ArrowArray *psChild, int iField,
430 : const std::vector<bool> &abyValidityFromFilters);
431 : template <typename T>
432 : void
433 : FillPrimitiveListArray(struct ArrowArray *psChild, int iField,
434 : const std::vector<bool> &abyValidityFromFilters);
435 : template <typename T>
436 : void
437 : FillStringOrBinaryArray(struct ArrowArray *psChild, int iField,
438 : const std::vector<bool> &abyValidityFromFilters);
439 : void FillTimeOrDateArray(struct ArrowArray *psChild, int iField,
440 : const std::vector<bool> &abyValidityFromFilters);
441 : int GetArrowSchema(struct ArrowArrayStream *,
442 : struct ArrowSchema *out_schema) override;
443 : int GetNextArrowArray(struct ArrowArrayStream *,
444 : struct ArrowArray *out_array) override;
445 :
446 : public:
447 : OGRTileDBLayer(GDALDataset *poDS, const char *pszFilename,
448 : const char *pszLayerName, const OGRwkbGeometryType eGType,
449 : const OGRSpatialReference *poSRS);
450 : ~OGRTileDBLayer();
451 : void ResetReading() override;
452 648 : DEFINE_GET_NEXT_FEATURE_THROUGH_RAW(OGRTileDBLayer)
453 : OGRFeature *GetFeature(GIntBig nFID) override;
454 : OGRErr ICreateFeature(OGRFeature *poFeature) override;
455 : OGRErr CreateField(const OGRFieldDefn *poField, int bApproxOK) override;
456 : int TestCapability(const char *) override;
457 : GIntBig GetFeatureCount(int bForce) override;
458 : OGRErr GetExtent(OGREnvelope *psExtent, int bForce = TRUE) override;
459 :
460 8 : OGRErr GetExtent(int iGeomField, OGREnvelope *psExtent, int bForce) override
461 : {
462 8 : return OGRLayer::GetExtent(iGeomField, psExtent, bForce);
463 : }
464 :
465 254 : const char *GetFIDColumn() override
466 : {
467 254 : return m_osFIDColumn.c_str();
468 : }
469 :
470 5497 : OGRFeatureDefn *GetLayerDefn() override
471 : {
472 5497 : return m_poFeatureDefn;
473 : }
474 :
475 : OGRErr SetAttributeFilter(const char *pszFilter) override;
476 :
477 : const char *GetMetadataItem(const char *pszName,
478 : const char *pszDomain) override;
479 :
480 19 : GDALDataset *GetDataset() override
481 : {
482 19 : return m_poDS;
483 : }
484 : };
485 :
486 : /************************************************************************/
487 : /* OGRTileDBDataset */
488 : /************************************************************************/
489 :
490 : class OGRTileDBDataset final : public TileDBDataset
491 : {
492 : friend OGRTileDBLayer;
493 : std::string m_osGroupName{};
494 : std::vector<std::unique_ptr<OGRLayer>> m_apoLayers{};
495 :
496 : public:
497 : OGRTileDBDataset();
498 : ~OGRTileDBDataset();
499 : OGRLayer *ExecuteSQL(const char *pszSQLCommand,
500 : OGRGeometry *poSpatialFilter,
501 : const char *pszDialect) override;
502 :
503 102 : int GetLayerCount() override
504 : {
505 102 : return static_cast<int>(m_apoLayers.size());
506 : }
507 :
508 64 : OGRLayer *GetLayer(int nIdx) override
509 : {
510 64 : return nIdx >= 0 && nIdx < GetLayerCount() ? m_apoLayers[nIdx].get()
511 64 : : nullptr;
512 : }
513 :
514 : int TestCapability(const char *) override;
515 :
516 : OGRLayer *ICreateLayer(const char *pszName,
517 : const OGRGeomFieldDefn *poGeomFieldDefn,
518 : CSLConstList papszOptions) override;
519 :
520 : static GDALDataset *Open(GDALOpenInfo *, tiledb::Object::Type objectType);
521 : static GDALDataset *Create(const char *pszFilename,
522 : CSLConstList papszOptions);
523 : };
524 :
525 : #endif // TILEDB_HEADERS_H
|